aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c3
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/bio-integrity.c4
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/exec.c14
-rw-r--r--fs/fcntl.c10
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fuse/dev.c42
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namespace.c23
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nilfs2/the_nilfs.c1
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/blockcheck.c4
-rw-r--r--fs/ocfs2/file.c15
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/mmap.c8
-rw-r--r--fs/ocfs2/namei.c302
-rw-r--r--fs/ocfs2/refcounttree.c5
-rw-r--r--fs/ocfs2/suballoc.c219
-rw-r--r--fs/ocfs2/suballoc.h21
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_bmap.c14
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_vnodeops.c13
32 files changed, 602 insertions, 158 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 358563689064..6406f896bf95 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -242,7 +242,8 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
}
kfree(wnames);
fid_out:
- v9fs_fid_add(dentry, fid);
+ if (!IS_ERR(fid))
+ v9fs_fid_add(dentry, fid);
err_out:
up_read(&v9ses->rename_sem);
return fid;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index a7528b913936..fd0cc0bf9a40 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -724,7 +724,7 @@ static int __init init_misc_binfmt(void)
{
int err = register_filesystem(&bm_fs_type);
if (!err) {
- err = register_binfmt(&misc_format);
+ err = insert_binfmt(&misc_format);
if (err)
unregister_filesystem(&bm_fs_type);
}
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 612a5c38d3c1..4d0ff5ee27b8 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -413,10 +413,10 @@ int bio_integrity_prep(struct bio *bio)
/* Allocate kernel buffer for protection data */
len = sectors * blk_integrity_tuple_size(bi);
- buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
+ buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
if (unlikely(buf == NULL)) {
printk(KERN_ERR "could not allocate integrity buffer\n");
- return -EIO;
+ return -ENOMEM;
}
end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 51f270b479b6..48d74c7391d1 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -634,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio)
int ret = 0;
if (dio->bio) {
- loff_t cur_offset = dio->block_in_file << dio->blkbits;
+ loff_t cur_offset = dio->cur_page_fs_offset;
loff_t bio_next_offset = dio->logical_offset_in_bio +
dio->bio->bi_size;
@@ -659,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio)
* Submit now if the underlying fs is about to perform a
* metadata read
*/
- if (dio->boundary)
+ else if (dio->boundary)
dio_bio_submit(dio);
}
diff --git a/fs/exec.c b/fs/exec.c
index 2d9455282744..828dd2461d6b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max)
argv++;
if (i++ >= max)
return -E2BIG;
+
+ if (fatal_signal_pending(current))
+ return -ERESTARTNOHAND;
cond_resched();
}
}
@@ -419,6 +422,12 @@ static int copy_strings(int argc, const char __user *const __user *argv,
while (len > 0) {
int offset, bytes_to_copy;
+ if (fatal_signal_pending(current)) {
+ ret = -ERESTARTNOHAND;
+ goto out;
+ }
+ cond_resched();
+
offset = pos % PAGE_SIZE;
if (offset == 0)
offset = PAGE_SIZE;
@@ -594,6 +603,11 @@ int setup_arg_pages(struct linux_binprm *bprm,
#else
stack_top = arch_align_stack(stack_top);
stack_top = PAGE_ALIGN(stack_top);
+
+ if (unlikely(stack_top < mmap_min_addr) ||
+ unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
+ return -ENOMEM;
+
stack_shift = vma->vm_end - stack_top;
bprm->p -= stack_shift;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6769fd0f35b8..f8cc34f542c3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -769,11 +769,15 @@ EXPORT_SYMBOL(kill_fasync);
static int __init fcntl_init(void)
{
- /* please add new bits here to ensure allocation uniqueness */
- BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ /*
+ * Please add new bits here to ensure allocation uniqueness.
+ * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
+ * is defined as O_NONBLOCK on some platforms and not on others.
+ */
+ BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
- O_TRUNC | O_APPEND | O_NONBLOCK |
+ O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7d9d06ba184b..81e086d8aa57 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -808,7 +808,7 @@ int bdi_writeback_thread(void *data)
wb->last_active = jiffies;
set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&bdi->work_list)) {
+ if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
__set_current_state(TASK_RUNNING);
continue;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 69ad053ffd78..d367af1514ef 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -276,7 +276,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
* Called with fc->lock, unlocks it
*/
static void request_end(struct fuse_conn *fc, struct fuse_req *req)
-__releases(&fc->lock)
+__releases(fc->lock)
{
void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
req->end = NULL;
@@ -306,8 +306,8 @@ __releases(&fc->lock)
static void wait_answer_interruptible(struct fuse_conn *fc,
struct fuse_req *req)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
{
if (signal_pending(current))
return;
@@ -325,8 +325,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
}
static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
{
if (!fc->no_interrupt) {
/* Any signal may interrupt this */
@@ -905,8 +905,8 @@ static int request_pending(struct fuse_conn *fc)
/* Wait until a request is available on the pending list */
static void request_wait(struct fuse_conn *fc)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
{
DECLARE_WAITQUEUE(wait, current);
@@ -934,7 +934,7 @@ __acquires(&fc->lock)
*/
static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
size_t nbytes, struct fuse_req *req)
-__releases(&fc->lock)
+__releases(fc->lock)
{
struct fuse_in_header ih;
struct fuse_interrupt_in arg;
@@ -1720,8 +1720,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
* This function releases and reacquires fc->lock
*/
static void end_requests(struct fuse_conn *fc, struct list_head *head)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
{
while (!list_empty(head)) {
struct fuse_req *req;
@@ -1744,8 +1744,8 @@ __acquires(&fc->lock)
* locked).
*/
static void end_io_requests(struct fuse_conn *fc)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
{
while (!list_empty(&fc->io)) {
struct fuse_req *req =
@@ -1769,6 +1769,16 @@ __acquires(&fc->lock)
}
}
+static void end_queued_requests(struct fuse_conn *fc)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ fc->max_background = UINT_MAX;
+ flush_bg_queue(fc);
+ end_requests(fc, &fc->pending);
+ end_requests(fc, &fc->processing);
+}
+
/*
* Abort all requests.
*
@@ -1795,8 +1805,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
fc->connected = 0;
fc->blocked = 0;
end_io_requests(fc);
- end_requests(fc, &fc->pending);
- end_requests(fc, &fc->processing);
+ end_queued_requests(fc);
wake_up_all(&fc->waitq);
wake_up_all(&fc->blocked_waitq);
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1811,8 +1820,9 @@ int fuse_dev_release(struct inode *inode, struct file *file)
if (fc) {
spin_lock(&fc->lock);
fc->connected = 0;
- end_requests(fc, &fc->pending);
- end_requests(fc, &fc->processing);
+ fc->blocked = 0;
+ end_queued_requests(fc);
+ wake_up_all(&fc->blocked_waitq);
spin_unlock(&fc->lock);
fuse_conn_put(fc);
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 147c1f71bdb9..c8224587123f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1144,8 +1144,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
/* Called under fc->lock, may release and reacquire it */
static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
{
struct fuse_inode *fi = get_fuse_inode(req->inode);
loff_t size = i_size_read(req->inode);
@@ -1183,8 +1183,8 @@ __acquires(&fc->lock)
* Called with fc->lock
*/
void fuse_flush_writepages(struct inode *inode)
-__releases(&fc->lock)
-__acquires(&fc->lock)
+__releases(fc->lock)
+__acquires(fc->lock)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index e20ee85955d1..f3f3578393a4 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
inode_inc_link_count(dir);
- inode = minix_new_inode(dir, mode, &err);
+ inode = minix_new_inode(dir, S_IFDIR | mode, &err);
if (!inode)
goto out_dir;
diff --git a/fs/namespace.c b/fs/namespace.c
index de402eb6eafb..a72eaabfe8f2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1484,13 +1484,30 @@ out_unlock:
}
/*
+ * Sanity check the flags to change_mnt_propagation.
+ */
+
+static int flags_to_propagation_type(int flags)
+{
+ int type = flags & ~MS_REC;
+
+ /* Fail if any non-propagation flags are set */
+ if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
+ return 0;
+ /* Only one propagation flag should be set */
+ if (!is_power_of_2(type))
+ return 0;
+ return type;
+}
+
+/*
* recursively change the type of the mountpoint.
*/
static int do_change_type(struct path *path, int flag)
{
struct vfsmount *m, *mnt = path->mnt;
int recurse = flag & MS_REC;
- int type = flag & ~MS_REC;
+ int type;
int err = 0;
if (!capable(CAP_SYS_ADMIN))
@@ -1499,6 +1516,10 @@ static int do_change_type(struct path *path, int flag)
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
+ type = flags_to_propagation_type(flag);
+ if (!type)
+ return -EINVAL;
+
down_write(&namespace_sem);
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3dfef0623968..cf0d2ffb3c84 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -440,7 +440,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
static int nfs4_access_to_omode(u32 access)
{
- switch (access) {
+ switch (access & NFS4_SHARE_ACCESS_BOTH) {
case NFS4_SHARE_ACCESS_READ:
return O_RDONLY;
case NFS4_SHARE_ACCESS_WRITE:
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 4317f177ea7c..ba7c10c917fc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -446,6 +446,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
nilfs_mdt_destroy(nilfs->ns_cpfile);
nilfs_mdt_destroy(nilfs->ns_sufile);
nilfs_mdt_destroy(nilfs->ns_dat);
+ nilfs_mdt_destroy(nilfs->ns_gc_dat);
failed:
nilfs_clear_recovery_info(&ri);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 215e12ce1d85..592fae5007d1 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
last_page_bytes = PAGE_ALIGN(end);
index = start >> PAGE_CACHE_SHIFT;
do {
- pages[numpages] = grab_cache_page(mapping, index);
+ pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
if (!pages[numpages]) {
ret = -ENOMEM;
mlog_errno(ret);
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index ec6d12339593..c7ee03c22226 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
ocfs2_blockcheck_inc_failure(stats);
mlog(ML_ERROR,
- "CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
+ "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
/* Ok, try ECC fixups */
@@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
goto out;
}
- mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
+ mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
rc = -EIO;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 81296b4e3646..9a03c151b5ce 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -36,6 +36,7 @@
#include <linux/writeback.h>
#include <linux/falloc.h>
#include <linux/quotaops.h>
+#include <linux/blkdev.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
@@ -190,8 +191,16 @@ static int ocfs2_sync_file(struct file *file, int datasync)
if (err)
goto bail;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
+ /*
+ * We still have to flush drive's caches to get data to the
+ * platter
+ */
+ if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+ NULL, BLKDEV_IFL_WAIT);
goto bail;
+ }
journal = osb->journal->j_journal;
err = jbd2_journal_force_commit(journal);
@@ -774,7 +783,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
BUG_ON(abs_from & (inode->i_blkbits - 1));
- page = grab_cache_page(mapping, index);
+ page = find_or_create_page(mapping, index, GFP_NOFS);
if (!page) {
ret = -ENOMEM;
mlog_errno(ret);
@@ -2329,7 +2338,7 @@ out_dio:
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
- ((file->f_flags & O_DIRECT) && has_refcount)) {
+ ((file->f_flags & O_DIRECT) && !direct_io)) {
ret = filemap_fdatawrite_range(file->f_mapping, pos,
pos + count - 1);
if (ret < 0)
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0492464916b1..eece3e05d9d0 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
OCFS2_BH_IGNORE_CACHE);
} else {
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
- if (!status)
+ /*
+ * If buffer is in jbd, then its checksum may not have been
+ * computed as yet.
+ */
+ if (!status && !buffer_jbd(bh))
status = ocfs2_validate_inode_block(osb->sb, bh);
}
if (status < 0) {
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index af2b8fe1f139..4c18f4ad93b4 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -74,9 +74,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
/*
* Another node might have truncated while we were waiting on
* cluster locks.
+ * We don't check size == 0 before the shift. This is borrowed
+ * from do_generic_file_read.
*/
- last_index = size >> PAGE_CACHE_SHIFT;
- if (page->index > last_index) {
+ last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+ if (unlikely(!size || page->index > last_index)) {
ret = -EINVAL;
goto out;
}
@@ -107,7 +109,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
* because the "write" would invalidate their data.
*/
if (page->index == last_index)
- len = size & ~PAGE_CACHE_MASK;
+ len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
&fsdata, di_bh, page);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f171b51a74f7..a00dda2e4f16 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -472,32 +472,23 @@ leave:
return status;
}
-static int ocfs2_mknod_locked(struct ocfs2_super *osb,
- struct inode *dir,
- struct inode *inode,
- dev_t dev,
- struct buffer_head **new_fe_bh,
- struct buffer_head *parent_fe_bh,
- handle_t *handle,
- struct ocfs2_alloc_context *inode_ac)
+static int __ocfs2_mknod_locked(struct inode *dir,
+ struct inode *inode,
+ dev_t dev,
+ struct buffer_head **new_fe_bh,
+ struct buffer_head *parent_fe_bh,
+ handle_t *handle,
+ struct ocfs2_alloc_context *inode_ac,
+ u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit)
{
int status = 0;
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_dinode *fe = NULL;
struct ocfs2_extent_list *fel;
- u64 suballoc_loc, fe_blkno = 0;
- u16 suballoc_bit;
u16 feat;
*new_fe_bh = NULL;
- status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
- inode_ac, &suballoc_loc,
- &suballoc_bit, &fe_blkno);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
-
/* populate as many fields early on as possible - many of
* these are used by the support functions here and in
* callers. */
@@ -591,6 +582,34 @@ leave:
return status;
}
+static int ocfs2_mknod_locked(struct ocfs2_super *osb,
+ struct inode *dir,
+ struct inode *inode,
+ dev_t dev,
+ struct buffer_head **new_fe_bh,
+ struct buffer_head *parent_fe_bh,
+ handle_t *handle,
+ struct ocfs2_alloc_context *inode_ac)
+{
+ int status = 0;
+ u64 suballoc_loc, fe_blkno = 0;
+ u16 suballoc_bit;
+
+ *new_fe_bh = NULL;
+
+ status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
+ inode_ac, &suballoc_loc,
+ &suballoc_bit, &fe_blkno);
+ if (status < 0) {
+ mlog_errno(status);
+ return status;
+ }
+
+ return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
+ parent_fe_bh, handle, inode_ac,
+ fe_blkno, suballoc_loc, suballoc_bit);
+}
+
static int ocfs2_mkdir(struct inode *dir,
struct dentry *dentry,
int mode)
@@ -1852,61 +1871,117 @@ bail:
return status;
}
-static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
- struct inode **ret_orphan_dir,
- u64 blkno,
- char *name,
- struct ocfs2_dir_lookup_result *lookup)
+static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
+ struct inode **ret_orphan_dir,
+ struct buffer_head **ret_orphan_dir_bh)
{
struct inode *orphan_dir_inode;
struct buffer_head *orphan_dir_bh = NULL;
- int status = 0;
-
- status = ocfs2_blkno_stringify(blkno, name);
- if (status < 0) {
- mlog_errno(status);
- return status;
- }
+ int ret = 0;
orphan_dir_inode = ocfs2_get_system_file_inode(osb,
ORPHAN_DIR_SYSTEM_INODE,
osb->slot_num);
if (!orphan_dir_inode) {
- status = -ENOENT;
- mlog_errno(status);
- return status;
+ ret = -ENOENT;
+ mlog_errno(ret);
+ return ret;
}
mutex_lock(&orphan_dir_inode->i_mutex);
- status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
+ ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+ if (ret < 0) {
+ mutex_unlock(&orphan_dir_inode->i_mutex);
+ iput(orphan_dir_inode);
+
+ mlog_errno(ret);
+ return ret;
}
- status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
- orphan_dir_bh, name,
- OCFS2_ORPHAN_NAMELEN, lookup);
- if (status < 0) {
- ocfs2_inode_unlock(orphan_dir_inode, 1);
+ *ret_orphan_dir = orphan_dir_inode;
+ *ret_orphan_dir_bh = orphan_dir_bh;
- mlog_errno(status);
- goto leave;
+ return 0;
+}
+
+static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
+ struct buffer_head *orphan_dir_bh,
+ u64 blkno,
+ char *name,
+ struct ocfs2_dir_lookup_result *lookup)
+{
+ int ret;
+ struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb);
+
+ ret = ocfs2_blkno_stringify(blkno, name);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
+ orphan_dir_bh, name,
+ OCFS2_ORPHAN_NAMELEN, lookup);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for
+ * insertion of an orphan.
+ * @osb: ocfs2 file system
+ * @ret_orphan_dir: Orphan dir inode - returned locked!
+ * @blkno: Actual block number of the inode to be inserted into orphan dir.
+ * @lookup: dir lookup result, to be passed back into functions like
+ * ocfs2_orphan_add
+ *
+ * Returns zero on success and the ret_orphan_dir, name and lookup
+ * fields will be populated.
+ *
+ * Returns non-zero on failure.
+ */
+static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
+ struct inode **ret_orphan_dir,
+ u64 blkno,
+ char *name,
+ struct ocfs2_dir_lookup_result *lookup)
+{
+ struct inode *orphan_dir_inode = NULL;
+ struct buffer_head *orphan_dir_bh = NULL;
+ int ret = 0;
+
+ ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode,
+ &orphan_dir_bh);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh,
+ blkno, name, lookup);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
}
*ret_orphan_dir = orphan_dir_inode;
-leave:
- if (status) {
+out:
+ brelse(orphan_dir_bh);
+
+ if (ret) {
+ ocfs2_inode_unlock(orphan_dir_inode, 1);
mutex_unlock(&orphan_dir_inode->i_mutex);
iput(orphan_dir_inode);
}
- brelse(orphan_dir_bh);
-
- mlog_exit(status);
- return status;
+ mlog_exit(ret);
+ return ret;
}
static int ocfs2_orphan_add(struct ocfs2_super *osb,
@@ -2053,6 +2128,99 @@ leave:
return status;
}
+/**
+ * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly
+ * allocated file. This is different from the typical 'add to orphan dir'
+ * operation in that the inode does not yet exist. This is a problem because
+ * the orphan dir stringifies the inode block number to come up with it's
+ * dirent. Obviously if the inode does not yet exist we have a chicken and egg
+ * problem. This function works around it by calling deeper into the orphan
+ * and suballoc code than other callers. Use this only by necessity.
+ * @dir: The directory which this inode will ultimately wind up under - not the
+ * orphan dir!
+ * @dir_bh: buffer_head the @dir inode block
+ * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled
+ * with the string to be used for orphan dirent. Pass back to the orphan dir
+ * code.
+ * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan
+ * dir code.
+ * @ret_di_blkno: block number where the new inode will be allocated.
+ * @orphan_insert: Dir insert context to be passed back into orphan dir code.
+ * @ret_inode_ac: Inode alloc context to be passed back to the allocator.
+ *
+ * Returns zero on success and the ret_orphan_dir, name and lookup
+ * fields will be populated.
+ *
+ * Returns non-zero on failure.
+ */
+static int ocfs2_prep_new_orphaned_file(struct inode *dir,
+ struct buffer_head *dir_bh,
+ char *orphan_name,
+ struct inode **ret_orphan_dir,
+ u64 *ret_di_blkno,
+ struct ocfs2_dir_lookup_result *orphan_insert,
+ struct ocfs2_alloc_context **ret_inode_ac)
+{
+ int ret;
+ u64 di_blkno;
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+ struct inode *orphan_dir = NULL;
+ struct buffer_head *orphan_dir_bh = NULL;
+ struct ocfs2_alloc_context *inode_ac = NULL;
+
+ ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ /* reserve an inode spot */
+ ret = ocfs2_reserve_new_inode(osb, &inode_ac);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac,
+ &di_blkno);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh,
+ di_blkno, orphan_name, orphan_insert);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+out:
+ if (ret == 0) {
+ *ret_orphan_dir = orphan_dir;
+ *ret_di_blkno = di_blkno;
+ *ret_inode_ac = inode_ac;
+ /*
+ * orphan_name and orphan_insert are already up to
+ * date via prepare_orphan_dir
+ */
+ } else {
+ /* Unroll reserve_new_inode* */
+ if (inode_ac)
+ ocfs2_free_alloc_context(inode_ac);
+
+ /* Unroll orphan dir locking */
+ mutex_unlock(&orphan_dir->i_mutex);
+ ocfs2_inode_unlock(orphan_dir, 1);
+ iput(orphan_dir);
+ }
+
+ brelse(orphan_dir_bh);
+
+ return 0;
+}
+
int ocfs2_create_inode_in_orphan(struct inode *dir,
int mode,
struct inode **new_inode)
@@ -2068,6 +2236,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
struct buffer_head *new_di_bh = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
+ u64 uninitialized_var(di_blkno), suballoc_loc;
+ u16 suballoc_bit;
status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
if (status < 0) {
@@ -2076,20 +2246,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
return status;
}
- /*
- * We give the orphan dir the root blkno to fake an orphan name,
- * and allocate enough space for our insertion.
- */
- status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
- osb->root_blkno,
- orphan_name, &orphan_insert);
- if (status < 0) {
- mlog_errno(status);
- goto leave;
- }
-
- /* reserve an inode spot */
- status = ocfs2_reserve_new_inode(osb, &inode_ac);
+ status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh,
+ orphan_name, &orphan_dir,
+ &di_blkno, &orphan_insert, &inode_ac);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
@@ -2116,17 +2275,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
goto leave;
did_quota_inode = 1;
- inode->i_nlink = 0;
- /* do the real work now. */
- status = ocfs2_mknod_locked(osb, dir, inode,
- 0, &new_di_bh, parent_di_bh, handle,
- inode_ac);
+ status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac,
+ &suballoc_loc,
+ &suballoc_bit, di_blkno);
if (status < 0) {
mlog_errno(status);
goto leave;
}
- status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name);
+ inode->i_nlink = 0;
+ /* do the real work now. */
+ status = __ocfs2_mknod_locked(dir, inode,
+ 0, &new_di_bh, parent_di_bh, handle,
+ inode_ac, di_blkno, suballoc_loc,
+ suballoc_bit);
if (status < 0) {
mlog_errno(status);
goto leave;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 73a11ccfd4c2..0afeda83120f 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2960,7 +2960,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (map_end & (PAGE_CACHE_SIZE - 1))
to = map_end & (PAGE_CACHE_SIZE - 1);
- page = grab_cache_page(mapping, page_index);
+ page = find_or_create_page(mapping, page_index, GFP_NOFS);
/*
* In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
@@ -3179,7 +3179,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
if (map_end > end)
map_end = end;
- page = grab_cache_page(context->inode->i_mapping, page_index);
+ page = find_or_create_page(context->inode->i_mapping,
+ page_index, GFP_NOFS);
BUG_ON(!page);
wait_on_page_writeback(page);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index a8e6a95a353f..8a286f54dca1 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -57,11 +57,28 @@ struct ocfs2_suballoc_result {
u64 sr_bg_blkno; /* The bg we allocated from. Set
to 0 when a block group is
contiguous. */
+ u64 sr_bg_stable_blkno; /*
+ * Doesn't change, always
+ * set to target block
+ * group descriptor
+ * block.
+ */
u64 sr_blkno; /* The first allocated block */
unsigned int sr_bit_offset; /* The bit in the bg */
unsigned int sr_bits; /* How many bits we claimed */
};
+static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
+{
+ if (res->sr_blkno == 0)
+ return 0;
+
+ if (res->sr_bg_blkno)
+ return res->sr_bg_blkno;
+
+ return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
+}
+
static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -138,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
brelse(ac->ac_bh);
ac->ac_bh = NULL;
ac->ac_resv = NULL;
+ if (ac->ac_find_loc_priv) {
+ kfree(ac->ac_find_loc_priv);
+ ac->ac_find_loc_priv = NULL;
+ }
}
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -1678,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
if (!ret)
ocfs2_bg_discontig_fix_result(ac, gd, res);
+ /*
+ * sr_bg_blkno might have been changed by
+ * ocfs2_bg_discontig_fix_result
+ */
+ res->sr_bg_stable_blkno = group_bh->b_blocknr;
+
+ if (ac->ac_find_loc_only)
+ goto out_loc_only;
+
ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
res->sr_bits,
le16_to_cpu(gd->bg_chain));
@@ -1691,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
if (ret < 0)
mlog_errno(ret);
+out_loc_only:
*bits_left = le16_to_cpu(gd->bg_free_bits_count);
out:
@@ -1708,7 +1739,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
{
int status;
u16 chain;
- u32 tmp_used;
u64 next_group;
struct inode *alloc_inode = ac->ac_inode;
struct buffer_head *group_bh = NULL;
@@ -1770,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
if (!status)
ocfs2_bg_discontig_fix_result(ac, bg, res);
+ /*
+ * sr_bg_blkno might have been changed by
+ * ocfs2_bg_discontig_fix_result
+ */
+ res->sr_bg_stable_blkno = group_bh->b_blocknr;
/*
* Keep track of previous block descriptor read. When
@@ -1796,22 +1831,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
}
}
- /* Ok, claim our bits now: set the info on dinode, chainlist
- * and then the group */
- status = ocfs2_journal_access_di(handle,
- INODE_CACHE(alloc_inode),
- ac->ac_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (status < 0) {
+ if (ac->ac_find_loc_only)
+ goto out_loc_only;
+
+ status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
+ ac->ac_bh, res->sr_bits,
+ chain);
+ if (status) {
mlog_errno(status);
goto bail;
}
- tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
- fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
- le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
- ocfs2_journal_dirty(handle, ac->ac_bh);
-
status = ocfs2_block_group_set_bits(handle,
alloc_inode,
bg,
@@ -1826,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
(unsigned long long)le64_to_cpu(fe->i_blkno));
+out_loc_only:
*bits_left = le16_to_cpu(bg->bg_free_bits_count);
bail:
brelse(group_bh);
@@ -1845,6 +1876,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
int status;
u16 victim, i;
u16 bits_left = 0;
+ u64 hint = ac->ac_last_group;
struct ocfs2_chain_list *cl;
struct ocfs2_dinode *fe;
@@ -1872,7 +1904,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
goto bail;
}
- res->sr_bg_blkno = ac->ac_last_group;
+ res->sr_bg_blkno = hint;
if (res->sr_bg_blkno) {
/* Attempt to short-circuit the usual search mechanism
* by jumping straight to the most recently used
@@ -1896,8 +1928,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
- if (!status)
+ if (!status) {
+ hint = ocfs2_group_from_res(res);
goto set_hint;
+ }
if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
goto bail;
@@ -1920,8 +1954,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
ac->ac_chain = i;
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
- if (!status)
+ if (!status) {
+ hint = ocfs2_group_from_res(res);
break;
+ }
if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
goto bail;
@@ -1936,7 +1972,7 @@ set_hint:
if (bits_left < min_bits)
ac->ac_last_group = 0;
else
- ac->ac_last_group = res->sr_bg_blkno;
+ ac->ac_last_group = hint;
}
bail:
@@ -2016,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
}
+int ocfs2_find_new_inode_loc(struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ struct ocfs2_alloc_context *ac,
+ u64 *fe_blkno)
+{
+ int ret;
+ handle_t *handle = NULL;
+ struct ocfs2_suballoc_result *res;
+
+ BUG_ON(!ac);
+ BUG_ON(ac->ac_bits_given != 0);
+ BUG_ON(ac->ac_bits_wanted != 1);
+ BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
+
+ res = kzalloc(sizeof(*res), GFP_NOFS);
+ if (res == NULL) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
+
+ /*
+ * The handle started here is for chain relink. Alternatively,
+ * we could just disable relink for these calls.
+ */
+ handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ handle = NULL;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * This will instruct ocfs2_claim_suballoc_bits and
+ * ocfs2_search_one_group to search but save actual allocation
+ * for later.
+ */
+ ac->ac_find_loc_only = 1;
+
+ ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ac->ac_find_loc_priv = res;
+ *fe_blkno = res->sr_blkno;
+
+out:
+ if (handle)
+ ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
+
+ if (ret)
+ kfree(res);
+
+ return ret;
+}
+
+int ocfs2_claim_new_inode_at_loc(handle_t *handle,
+ struct inode *dir,
+ struct ocfs2_alloc_context *ac,
+ u64 *suballoc_loc,
+ u16 *suballoc_bit,
+ u64 di_blkno)
+{
+ int ret;
+ u16 chain;
+ struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv;
+ struct buffer_head *bg_bh = NULL;
+ struct ocfs2_group_desc *bg;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data;
+
+ /*
+ * Since di_blkno is being passed back in, we check for any
+ * inconsistencies which may have happened between
+ * calls. These are code bugs as di_blkno is not expected to
+ * change once returned from ocfs2_find_new_inode_loc()
+ */
+ BUG_ON(res->sr_blkno != di_blkno);
+
+ ret = ocfs2_read_group_descriptor(ac->ac_inode, di,
+ res->sr_bg_stable_blkno, &bg_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ bg = (struct ocfs2_group_desc *) bg_bh->b_data;
+ chain = le16_to_cpu(bg->bg_chain);
+
+ ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle,
+ ac->ac_bh, res->sr_bits,
+ chain);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_block_group_set_bits(handle,
+ ac->ac_inode,
+ bg,
+ bg_bh,
+ res->sr_bit_offset,
+ res->sr_bits);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
+ (unsigned long long)di_blkno);
+
+ atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
+
+ BUG_ON(res->sr_bits != 1);
+
+ *suballoc_loc = res->sr_bg_blkno;
+ *suballoc_bit = res->sr_bit_offset;
+ ac->ac_bits_given++;
+ ocfs2_save_inode_ac_group(dir, ac);
+
+out:
+ brelse(bg_bh);
+
+ return ret;
+}
+
int ocfs2_claim_new_inode(handle_t *handle,
struct inode *dir,
struct buffer_head *parent_fe_bh,
@@ -2567,7 +2733,8 @@ out:
* suballoc_bit.
*/
static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
- u16 *suballoc_slot, u16 *suballoc_bit)
+ u16 *suballoc_slot, u64 *group_blkno,
+ u16 *suballoc_bit)
{
int status;
struct buffer_head *inode_bh = NULL;
@@ -2604,6 +2771,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
*suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
if (suballoc_bit)
*suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
+ if (group_blkno)
+ *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc);
bail:
brelse(inode_bh);
@@ -2621,7 +2790,8 @@ bail:
*/
static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
struct inode *suballoc,
- struct buffer_head *alloc_bh, u64 blkno,
+ struct buffer_head *alloc_bh,
+ u64 group_blkno, u64 blkno,
u16 bit, int *res)
{
struct ocfs2_dinode *alloc_di;
@@ -2642,10 +2812,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
goto bail;
}
- if (alloc_di->i_suballoc_loc)
- bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc);
- else
- bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
+ bg_blkno = group_blkno ? group_blkno :
+ ocfs2_which_suballoc_group(blkno, bit);
status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
&group_bh);
if (status < 0) {
@@ -2680,6 +2848,7 @@ bail:
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
{
int status;
+ u64 group_blkno = 0;
u16 suballoc_bit = 0, suballoc_slot = 0;
struct inode *inode_alloc_inode;
struct buffer_head *alloc_bh = NULL;
@@ -2687,7 +2856,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
mlog_entry("blkno: %llu", (unsigned long long)blkno);
status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
- &suballoc_bit);
+ &group_blkno, &suballoc_bit);
if (status < 0) {
mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
goto bail;
@@ -2715,7 +2884,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
}
status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
- blkno, suballoc_bit, res);
+ group_blkno, blkno, suballoc_bit, res);
if (status < 0)
mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a017dd3ee7d9..b8afabfeede4 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -56,6 +56,9 @@ struct ocfs2_alloc_context {
u64 ac_max_block; /* Highest block number to allocate. 0 is
is the same as ~0 - unlimited */
+ int ac_find_loc_only; /* hack for reflink operation ordering */
+ struct ocfs2_suballoc_result *ac_find_loc_priv; /* */
+
struct ocfs2_alloc_reservation *ac_resv;
};
@@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
struct ocfs2_alloc_context **meta_ac);
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
+
+
+
+/*
+ * The following two interfaces are for ocfs2_create_inode_in_orphan().
+ */
+int ocfs2_find_new_inode_loc(struct inode *dir,
+ struct buffer_head *parent_fe_bh,
+ struct ocfs2_alloc_context *ac,
+ u64 *fe_blkno);
+
+int ocfs2_claim_new_inode_at_loc(handle_t *handle,
+ struct inode *dir,
+ struct ocfs2_alloc_context *ac,
+ u64 *suballoc_loc,
+ u16 *suballoc_bit,
+ u64 di_blkno);
+
#endif /* _CHAINALLOC_H_ */
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 180cf5a0bd67..3b8b45660331 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -146,7 +146,7 @@ u64 stable_page_flags(struct page *page)
u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
#endif
-#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached);
#endif
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 439fc1f1c1c4..271afc48b9a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
/* We don't show the stack guard page in /proc/maps */
start = vma->vm_start;
if (vma->vm_flags & VM_GROWSDOWN)
- start += PAGE_SIZE;
+ if (!vma_stack_continue(vma->vm_prev, vma->vm_start))
+ start += PAGE_SIZE;
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
start,
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1b27b5688f62..da3fefe91a8f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
char *p;
p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
- if (p)
+ if (!IS_ERR(p))
memmove(last_sysfs_file, p, strlen(p) + 1);
/* need attr_sd for attr and ops, its parent for kobj */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ea79072f5210..286e36e21dae 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -440,12 +440,7 @@ _xfs_buf_find(
ASSERT(btp == bp->b_target);
if (bp->b_file_offset == range_base &&
bp->b_buffer_length == range_length) {
- /*
- * If we look at something, bring it to the
- * front of the list for next time.
- */
atomic_inc(&bp->b_hold);
- list_move(&bp->b_hash_list, &hash->bh_list);
goto found;
}
}
@@ -1443,8 +1438,7 @@ xfs_alloc_bufhash(
{
unsigned int i;
- btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
- btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
+ btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */
btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
sizeof(xfs_bufhash_t));
for (i = 0; i < (1 << btp->bt_hashshift); i++) {
@@ -1938,7 +1932,8 @@ xfs_buf_init(void)
if (!xfs_buf_zone)
goto out;
- xfslogd_workqueue = create_workqueue("xfslogd");
+ xfslogd_workqueue = alloc_workqueue("xfslogd",
+ WQ_RESCUER | WQ_HIGHPRI, 1);
if (!xfslogd_workqueue)
goto out_free_buf_zone;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index d072e5ff923b..2a05614f0b92 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -137,7 +137,6 @@ typedef struct xfs_buftarg {
size_t bt_smask;
/* per device buffer hash table */
- uint bt_hashmask;
uint bt_hashshift;
xfs_bufhash_t *bt_hash;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 237f5ffb2ee8..3b9e626f7cd1 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr(
{
struct fsxattr fa;
+ memset(&fa, 0, sizeof(struct fsxattr));
+
xfs_ilock(ip, XFS_ILOCK_SHARED);
fa.fsx_xflags = xfs_ip2xflags(ip);
fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
@@ -907,6 +909,13 @@ xfs_ioctl_setattr(
return XFS_ERROR(EIO);
/*
+ * Disallow 32bit project ids because on-disk structure
+ * is 16bit only.
+ */
+ if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1))
+ return XFS_ERROR(EINVAL);
+
+ /*
* If disk quotas is on, we make sure that the dquots do exist on disk,
* before we start any other transactions. Trying to do this later
* is messy. We don't care to take a readlock to look at the ids
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 68be25dcd301..b1fc2a6bfe83 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -664,7 +664,7 @@ xfs_vn_fiemap(
fieinfo->fi_extents_max + 1;
bm.bmv_count = min_t(__s32, bm.bmv_count,
(PAGE_SIZE * 16 / sizeof(struct getbmapx)));
- bm.bmv_iflags = BMV_IF_PREALLOC;
+ bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
bm.bmv_iflags |= BMV_IF_ATTRFORK;
if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 23f14e595c18..f90dadd5a968 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5533,12 +5533,24 @@ xfs_getbmap(
map[i].br_startblock))
goto out_free_map;
- nexleft--;
bmv->bmv_offset =
out[cur_ext].bmv_offset +
out[cur_ext].bmv_length;
bmv->bmv_length =
max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+
+ /*
+ * In case we don't want to return the hole,
+ * don't increase cur_ext so that we can reuse
+ * it in the next loop.
+ */
+ if ((iflags & BMV_IF_NO_HOLES) &&
+ map[i].br_startblock == HOLESTARTBLOCK) {
+ memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
+ continue;
+ }
+
+ nexleft--;
bmv->bmv_entries++;
cur_ext++;
}
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 7cf7220e7d5f..87c2e9d02288 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -114,8 +114,10 @@ struct getbmapx {
#define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */
#define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */
#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */
+#define BMV_IF_NO_HOLES 0x10 /* Do not return holes */
#define BMV_IF_VALID \
- (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
+ (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \
+ BMV_IF_DELALLOC|BMV_IF_NO_HOLES)
/* bmv_oflags values - returned for each non-header segment */
#define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 66d585c6917c..4c7c7bfb2b2f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -2299,15 +2299,22 @@ xfs_alloc_file_space(
e = allocatesize_fsb;
}
+ /*
+ * The transaction reservation is limited to a 32-bit block
+ * count, hence we need to limit the number of blocks we are
+ * trying to reserve to avoid an overflow. We can't allocate
+ * more than @nimaps extents, and an extent is limited on disk
+ * to MAXEXTLEN (21 bits), so use that to enforce the limit.
+ */
+ resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
if (unlikely(rt)) {
- resrtextents = qblocks = (uint)(e - s);
+ resrtextents = qblocks = resblks;
resrtextents /= mp->m_sb.sb_rextsize;
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
quota_flag = XFS_QMOPT_RES_RTBLKS;
} else {
resrtextents = 0;
- resblks = qblocks = \
- XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s));
+ resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
quota_flag = XFS_QMOPT_RES_REGBLKS;
}