aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/aio.c20
-rw-r--r--fs/binfmt_elf.c31
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/aops.c6
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c101
-rw-r--r--fs/gfs2/glock.c47
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c18
-rw-r--r--fs/gfs2/quota.c62
-rw-r--r--fs/gfs2/quota.h8
-rw-r--r--fs/gfs2/rgrp.c20
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/xattr.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/ocfs2/alloc.c48
-rw-r--r--fs/ocfs2/aops.c155
-rw-r--r--fs/ocfs2/cluster/heartbeat.c42
-rw-r--r--fs/ocfs2/cluster/masklog.h5
-rw-r--r--fs/ocfs2/dir.c15
-rw-r--r--fs/ocfs2/dlmglue.c7
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/file.c17
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c46
-rw-r--r--fs/ocfs2/xattr.c8
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/group.c11
-rw-r--r--fs/tracefs/Makefile4
-rw-r--r--fs/tracefs/inode.c650
46 files changed, 1159 insertions, 246 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 270c48148f79..2d0cbbd14cfc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
-config ARCH_BINFMT_ELF_RANDOMIZE_PIE
- bool
-
config ARCH_BINFMT_ELF_STATE
bool
diff --git a/fs/Makefile b/fs/Makefile
index a88ac4838c9e..cb92fd4c3172 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/
obj-$(CONFIG_HPPFS) += hppfs/
obj-$(CONFIG_CACHEFILES) += cachefiles/
obj-$(CONFIG_DEBUG_FS) += debugfs/
+obj-$(CONFIG_TRACING) += tracefs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_BTRFS_FS) += btrfs/
obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/aio.c b/fs/aio.c
index 8eece807abed..5785c4b58fea 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -310,11 +310,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct kioctx_table *table;
- int i;
+ int i, res = -EINVAL;
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
@@ -324,13 +324,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
ctx = table->table[i];
if (ctx && ctx->aio_ring_file == file) {
- ctx->user_id = ctx->mmap_base = vma->vm_start;
+ if (!atomic_read(&ctx->dead)) {
+ ctx->user_id = ctx->mmap_base = vma->vm_start;
+ res = 0;
+ }
break;
}
}
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
+ return res;
}
static const struct file_operations aio_ring_fops = {
@@ -759,6 +763,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
err_cleanup:
aio_nr_sub(ctx->max_reqs);
err_ctx:
+ atomic_set(&ctx->dead, 1);
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
aio_free_ring(ctx);
err:
mutex_unlock(&ctx->ring_lock);
@@ -780,11 +787,12 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
{
struct kioctx_table *table;
- if (atomic_xchg(&ctx->dead, 1))
+ spin_lock(&mm->ioctx_lock);
+ if (atomic_xchg(&ctx->dead, 1)) {
+ spin_unlock(&mm->ioctx_lock);
return -EINVAL;
+ }
-
- spin_lock(&mm->ioctx_lock);
table = rcu_dereference_raw(mm->ioctx_table);
WARN_ON(ctx != table->table[ctx->id]);
table->table[ctx->id] = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 995986b8e36b..241ef68d2893 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/random.h>
#include <linux/elf.h>
+#include <linux/elf-randomize.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
@@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
unsigned long k, vaddr;
+ unsigned long total_size = 0;
if (elf_ppnt->p_type != PT_LOAD)
continue;
@@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
-#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
- /* Memory randomization might have been switched off
- * in runtime via sysctl or explicit setting of
- * personality flags.
- * If that is the case, retain the original non-zero
- * load_bias value in order to establish proper
- * non-randomized mappings.
- */
+ load_bias = ELF_ET_DYN_BASE - vaddr;
if (current->flags & PF_RANDOMIZE)
- load_bias = 0;
- else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#endif
+ load_bias += arch_mmap_rnd();
+ load_bias = ELF_PAGESTART(load_bias);
+ total_size = total_mapping_size(elf_phdata,
+ loc->elf_ex.e_phnum);
+ if (!total_size) {
+ error = -EINVAL;
+ goto out_free_dentry;
+ }
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags, 0);
+ elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
@@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p;
-#ifdef arch_randomize_brk
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
-#ifdef CONFIG_COMPAT_BRK
+#ifdef compat_brk_randomized
current->brk_randomized = 1;
#endif
}
-#endif
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..c7a5602d01ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page)
* to synchronise against __set_page_dirty_buffers and prevent the
* dirty bit from being lost.
*/
- if (ret)
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ if (ret && TestClearPageDirty(page))
+ account_page_cleaned(page, mapping);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 480cf9c81d50..f3bfe08e177b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
length = atomic_dec_return(&tcpSesAllocCount);
if (length > 0)
- mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
- GFP_KERNEL);
+ mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
}
static int
@@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p)
length = atomic_inc_return(&tcpSesAllocCount);
if (length > 1)
- mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
- GFP_KERNEL);
+ mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
set_freezable();
while (server->tcpStatus != CifsExiting) {
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 96400ab42d13..61e72d44cf94 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
pr_debug("debugfs: creating file '%s'\n",name);
+ if (IS_ERR(parent))
+ return parent;
+
error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
&debugfs_mount_count);
if (error)
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7b3143064af1..1be3b061c05c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
if (error)
goto out;
-
- if (acl)
- set_cached_acl(inode, type, acl);
- else
- forget_cached_acl(inode, type);
+ set_cached_acl(inode, type, acl);
out:
kfree(data);
return error;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index fe6634d25d1d..a6e6990aea39 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
if (alloc_required) {
struct gfs2_alloc_parms ap = { .aflags = 0, };
- error = gfs2_quota_lock_check(ip);
+ requested = data_blocks + ind_blocks;
+ ap.target = requested;
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
goto out_unlock;
- requested = data_blocks + ind_blocks;
- ap.target = requested;
error = gfs2_inplace_reserve(ip, &ap);
if (error)
goto out_qunlock;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f0b945ab853e..61296ecbd0e2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size)
if (gfs2_is_stuffed(ip) &&
(size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 614bb42cb7e1..207eb4a8135e 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -428,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out_unlock;
- ret = gfs2_quota_lock_check(ip);
- if (ret)
- goto out_unlock;
gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
ap.target = data_blocks + ind_blocks;
+ ret = gfs2_quota_lock_check(ip, &ap);
+ if (ret)
+ goto out_unlock;
ret = gfs2_inplace_reserve(ip, &ap);
if (ret)
goto out_quota_unlock;
@@ -764,22 +764,30 @@ out:
brelse(dibh);
return error;
}
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
- unsigned int *data_blocks, unsigned int *ind_blocks)
+/**
+ * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
+ * blocks, determine how many bytes can be written.
+ * @ip: The inode in question.
+ * @len: Max cap of bytes. What we return in *len must be <= this.
+ * @data_blocks: Compute and return the number of data blocks needed
+ * @ind_blocks: Compute and return the number of indirect blocks needed
+ * @max_blocks: The total blocks available to work with.
+ *
+ * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
+ */
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
+ unsigned int *data_blocks, unsigned int *ind_blocks,
+ unsigned int max_blocks)
{
+ loff_t max = *len;
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int max_blocks = ip->i_rgd->rd_free_clone;
unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
for (tmp = max_data; tmp > sdp->sd_diptrs;) {
tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
max_data -= tmp;
}
- /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
- so it might end up with fewer data blocks */
- if (max_data <= *data_blocks)
- return;
+
*data_blocks = max_data;
*ind_blocks = max_blocks - max_data;
*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@ -796,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
- loff_t bytes, max_bytes;
+ loff_t bytes, max_bytes, max_blks = UINT_MAX;
int error;
const loff_t pos = offset;
const loff_t count = len;
@@ -818,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
gfs2_size_hint(file, offset, len);
+ gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
+ ap.min_target = data_blocks + ind_blocks;
+
while (len > 0) {
if (len < bytes)
bytes = len;
@@ -826,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
offset += bytes;
continue;
}
- error = gfs2_quota_lock_check(ip);
+
+ /* We need to determine how many bytes we can actually
+ * fallocate without exceeding quota or going over the
+ * end of the fs. We start off optimistically by assuming
+ * we can write max_bytes */
+ max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
+
+ /* Since max_bytes is most likely a theoretical max, we
+ * calculate a more realistic 'bytes' to serve as a good
+ * starting point for the number of bytes we may be able
+ * to write */
+ gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+ ap.target = data_blocks + ind_blocks;
+
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
-retry:
- gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+ /* ap.allowed tells us how many blocks quota will allow
+ * us to write. Check if this reduces max_blks */
+ if (ap.allowed && ap.allowed < max_blks)
+ max_blks = ap.allowed;
- ap.target = data_blocks + ind_blocks;
error = gfs2_inplace_reserve(ip, &ap);
- if (error) {
- if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
- bytes >>= 1;
- bytes &= bsize_mask;
- if (bytes == 0)
- bytes = sdp->sd_sb.sb_bsize;
- goto retry;
- }
+ if (error)
goto out_qunlock;
- }
- max_bytes = bytes;
- calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
- &max_bytes, &data_blocks, &ind_blocks);
+
+ /* check if the selected rgrp limits our max_blks further */
+ if (ap.allowed && ap.allowed < max_blks)
+ max_blks = ap.allowed;
+
+ /* Almost done. Calculate bytes that can be written using
+ * max_blks. We also recompute max_bytes, data_blocks and
+ * ind_blocks */
+ calc_max_reserv(ip, &max_bytes, &data_blocks,
+ &ind_blocks, max_blks);
rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@ -930,6 +955,22 @@ out_uninit:
return ret;
}
+static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos,
+ size_t len, unsigned int flags)
+{
+ int error;
+ struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
+
+ error = gfs2_rs_alloc(ip);
+ if (error)
+ return (ssize_t)error;
+
+ gfs2_size_hint(out, *ppos, len);
+
+ return iter_file_splice_write(pipe, out, ppos, len, flags);
+}
+
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
/**
@@ -1074,7 +1115,7 @@ const struct file_operations gfs2_file_fops = {
.lock = gfs2_lock,
.flock = gfs2_flock,
.splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
};
@@ -1102,7 +1143,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.release = gfs2_release,
.fsync = gfs2_fsync,
.splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
};
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42dffba056a..0fa8062f85a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = {
int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
{
- sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
- if (!sdp->debugfs_dir)
- return -ENOMEM;
- sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_glocks_fops);
- if (!sdp->debugfs_dentry_glocks)
+ struct dentry *dent;
+
+ dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
+ if (IS_ERR_OR_NULL(dent))
+ goto fail;
+ sdp->debugfs_dir = dent;
+
+ dent = debugfs_create_file("glocks",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_glocks_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_glocks = dent;
- sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_glstats_fops);
- if (!sdp->debugfs_dentry_glstats)
+ dent = debugfs_create_file("glstats",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_glstats_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_glstats = dent;
- sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_sbstats_fops);
- if (!sdp->debugfs_dentry_sbstats)
+ dent = debugfs_create_file("sbstats",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_sbstats_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_sbstats = dent;
return 0;
fail:
gfs2_delete_debugfs_file(sdp);
- return -ENOMEM;
+ return dent ? PTR_ERR(dent) : -ENOMEM;
}
void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
@@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
int gfs2_register_debugfs(void)
{
gfs2_root = debugfs_create_dir("gfs2", NULL);
+ if (IS_ERR(gfs2_root))
+ return PTR_ERR(gfs2_root);
return gfs2_root ? 0 : -ENOMEM;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a2dbbc0d634..58b75abf6ab2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -301,8 +301,10 @@ struct gfs2_blkreserv {
* to the allocation code.
*/
struct gfs2_alloc_parms {
- u32 target;
+ u64 target;
+ u32 min_target;
u32 aflags;
+ u64 allowed;
};
enum {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 73c72253faac..08bc84d7e768 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
int error;
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
goto out;
@@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
int error;
if (da->nr_blocks) {
- error = gfs2_quota_lock_check(dip);
+ error = gfs2_quota_lock_check(dip, &ap);
if (error)
goto fail_quota_locks;
@@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (da.nr_blocks) {
struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
- error = gfs2_quota_lock_check(dip);
+ error = gfs2_quota_lock_check(dip, &ap);
if (error)
goto out_gunlock;
@@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (da.nr_blocks) {
struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
- error = gfs2_quota_lock_check(ndip);
+ error = gfs2_quota_lock_check(ndip, &ap);
if (error)
goto out_gunlock;
@@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
kuid_t ouid, nuid;
kgid_t ogid, ngid;
int error;
+ struct gfs2_alloc_parms ap;
ouid = inode->i_uid;
ogid = inode->i_gid;
@@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (error)
goto out;
+ ap.target = gfs2_get_inode_blocks(&ip->i_inode);
+
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
- error = gfs2_quota_check(ip, nuid, ngid);
+ error = gfs2_quota_check(ip, nuid, ngid, &ap);
if (error)
goto out_gunlock_q;
}
@@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
- u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
- gfs2_quota_change(ip, -blocks, ouid, ogid);
- gfs2_quota_change(ip, blocks, nuid, ngid);
+ gfs2_quota_change(ip, -ap.target, ouid, ogid);
+ gfs2_quota_change(ip, ap.target, nuid, ngid);
}
out_end_trans:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3aa17d4d1cfc..5c27e48aa76f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -923,6 +923,9 @@ restart:
if (error)
return error;
+ if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
+ force_refresh = FORCE;
+
qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
@@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
sizeof(struct gfs2_quota_data *), sort_qd, NULL);
for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
- int force = NO_FORCE;
qd = ip->i_res->rs_qa_qd[x];
- if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
- force = FORCE;
- error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
+ error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
if (error)
break;
}
@@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
return 0;
}
-int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
+/**
+ * gfs2_quota_check - check if allocating new blocks will exceed quota
+ * @ip: The inode for which this check is being performed
+ * @uid: The uid to check against
+ * @gid: The gid to check against
+ * @ap: The allocation parameters. ap->target contains the requested
+ * blocks. ap->min_target, if set, contains the minimum blks
+ * requested.
+ *
+ * Returns: 0 on success.
+ * min_req = ap->min_target ? ap->min_target : ap->target;
+ * quota must allow atleast min_req blks for success and
+ * ap->allowed is set to the number of blocks allowed
+ *
+ * -EDQUOT otherwise, quota violation. ap->allowed is set to number
+ * of blocks available.
+ */
+int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+ struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qd;
- s64 value;
+ s64 value, warn, limit;
unsigned int x;
int error = 0;
+ ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
return 0;
@@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
qid_eq(qd->qd_id, make_kqid_gid(gid))))
continue;
+ warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
+ limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
spin_lock(&qd_lock);
value += qd->qd_change;
spin_unlock(&qd_lock);
- if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
- print_message(qd, "exceeded");
- quota_send_warning(qd->qd_id,
- sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
-
- error = -EDQUOT;
- break;
- } else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
- (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value &&
+ if (limit > 0 && (limit - value) < ap->allowed)
+ ap->allowed = limit - value;
+ /* If we can't meet the target */
+ if (limit && limit < (value + (s64)ap->target)) {
+ /* If no min_target specified or we don't meet
+ * min_target, return -EDQUOT */
+ if (!ap->min_target || ap->min_target > ap->allowed) {
+ print_message(qd, "exceeded");
+ quota_send_warning(qd->qd_id,
+ sdp->sd_vfs->s_dev,
+ QUOTA_NL_BHARDWARN);
+ error = -EDQUOT;
+ break;
+ }
+ } else if (warn && warn < value &&
time_after_eq(jiffies, qd->qd_last_warn +
- gfs2_tune_get(sdp,
- gt_quota_warn_period) * HZ)) {
+ gfs2_tune_get(sdp, gt_quota_warn_period)
+ * HZ)) {
quota_send_warning(qd->qd_id,
sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
error = print_message(qd, "warning");
qd->qd_last_warn = jiffies;
}
}
-
return error;
}
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 55d506eb3c4a..ad04b3acae2b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip);
extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
extern void gfs2_quota_unlock(struct gfs2_inode *ip);
-extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
+extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+ struct gfs2_alloc_parms *ap);
extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
kuid_t uid, kgid_t gid);
@@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data);
extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
-static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
+static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
+ struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
int ret;
@@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
return 0;
- ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
+ ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
if (ret)
gfs2_quota_unlock(ip);
return ret;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9150207f365c..6af2396a317c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
* @ip: the inode to reserve space for
* @ap: the allocation parameters
*
- * Returns: errno
+ * We try our best to find an rgrp that has at least ap->target blocks
+ * available. After a couple of passes (loops == 2), the prospects of finding
+ * such an rgrp diminish. At this stage, we return the first rgrp that has
+ * atleast ap->min_target blocks available. Either way, we set ap->allowed to
+ * the number of blocks available in the chosen rgrp.
+ *
+ * Returns: 0 on success,
+ * -ENOMEM if a suitable rgrp can't be found
+ * errno otherwise
*/
-int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
@@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
/* Skip unuseable resource groups */
if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
GFS2_RDF_ERROR)) ||
- (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
+ (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb)
@@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
goto check_rgrp;
/* If rgrp has enough free space, use it */
- if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
+ if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
+ (loops == 2 && ap->min_target &&
+ rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
ip->i_rgd = rs->rs_rbm.rgd;
+ ap->allowed = ip->i_rgd->rd_free_clone;
return 0;
}
-
check_rgrp:
/* Check for unlinked inodes which can be reclaimed */
if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b104f4af3afd..68972ecfbb01 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
#define GFS2_AF_ORLOV 1
-extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap);
+extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
+ struct gfs2_alloc_parms *ap);
extern void gfs2_inplace_release(struct gfs2_inode *ip);
extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f783f787..fd260ce8869a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
return error;
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8c2dad629e7c..45e34908bdb5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -294,7 +294,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
static void truncate_huge_page(struct page *page)
{
- cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
+ ClearPageDirty(page);
ClearPageUptodate(page);
delete_from_page_cache(page);
}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d72817ac51f6..762c7a3cf43d 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
/* unchecked xdatum is chained with c->xattr_unchecked */
list_del_init(&xd->xindex);
- dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n",
+ dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n",
xd->xid, xd->version);
return 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5d30c56ae075..4cd9798f4948 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- pr_err("ERROR: (device %s): %pf: %pV\n",
+ pr_err("ERROR: (device %s): %ps: %pV\n",
sb->s_id, __builtin_return_address(0), &vaf);
va_end(args);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 849ed784d6ac..759931088094 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1876,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
* request from the inode / page_private pointer and
* release it */
nfs_inode_remove_request(req);
- /*
- * In case nfs_inode_remove_request has marked the
- * page as being dirty
- */
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
nfs_unlock_and_release_request(req);
}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 044158bd22be..2d7f76e52c37 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
ret = ocfs2_get_right_path(et, left_path, &right_path);
if (ret) {
mlog_errno(ret);
- goto out;
+ return ret;
}
right_el = path_leaf_el(right_path);
@@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
subtree_index);
}
out:
- if (right_path)
- ocfs2_free_path(right_path);
+ ocfs2_free_path(right_path);
return ret;
}
@@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
ret = ocfs2_get_left_path(et, right_path, &left_path);
if (ret) {
mlog_errno(ret);
- goto out;
+ return ret;
}
left_el = path_leaf_el(left_path);
@@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
right_path, subtree_index);
}
out:
- if (left_path)
- ocfs2_free_path(left_path);
+ ocfs2_free_path(left_path);
return ret;
}
@@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
} else if (path->p_tree_depth > 0) {
status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
if (status)
- goto out;
+ goto exit;
if (left_cpos != 0) {
left_path = ocfs2_new_path_from_path(path);
if (!left_path)
- goto out;
+ goto exit;
status = ocfs2_find_path(et->et_ci, left_path,
left_cpos);
if (status)
- goto out;
+ goto free_left_path;
new_el = path_leaf_el(left_path);
@@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
le16_to_cpu(new_el->l_next_free_rec),
le16_to_cpu(new_el->l_count));
status = -EINVAL;
- goto out;
+ goto free_left_path;
}
rec = &new_el->l_recs[
le16_to_cpu(new_el->l_next_free_rec) - 1];
@@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
path->p_tree_depth > 0) {
status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
if (status)
- goto out;
+ goto free_left_path;
if (right_cpos == 0)
- goto out;
+ goto free_left_path;
right_path = ocfs2_new_path_from_path(path);
if (!right_path)
- goto out;
+ goto free_left_path;
status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
if (status)
- goto out;
+ goto free_right_path;
new_el = path_leaf_el(right_path);
rec = &new_el->l_recs[0];
@@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
(unsigned long long)le64_to_cpu(eb->h_blkno),
le16_to_cpu(new_el->l_next_free_rec));
status = -EINVAL;
- goto out;
+ goto free_right_path;
}
rec = &new_el->l_recs[1];
}
@@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
ret = contig_type;
}
-out:
- if (left_path)
- ocfs2_free_path(left_path);
- if (right_path)
- ocfs2_free_path(right_path);
-
+free_right_path:
+ ocfs2_free_path(right_path);
+free_left_path:
+ ocfs2_free_path(left_path);
+exit:
return ret;
}
@@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
if (pages == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
- goto out;
+ return ret;
}
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto free_pages;
}
}
@@ -6996,9 +6993,8 @@ out_commit:
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
- if (pages)
- kfree(pages);
-
+free_pages:
+ kfree(pages);
return ret;
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e1bf18c5d25e..8d2bc840c288 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -664,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb,
return 0;
}
+static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
+ struct inode *inode, loff_t offset,
+ u64 zero_len, int cluster_align)
+{
+ u32 p_cpos = 0;
+ u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+ int ret = 0;
+
+ if (offset <= i_size_read(inode) || cluster_align)
+ return 0;
+
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
+ &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ u64 s = i_size_read(inode);
+ sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
+ (do_div(s, osb->s_clustersize) >> 9);
+
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
+ zero_len >> 9, GFP_NOFS, false);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+
+ return ret;
+}
+
+static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb,
+ struct inode *inode, loff_t offset)
+{
+ u64 zero_start, zero_len, total_zero_len;
+ u32 p_cpos = 0, clusters_to_add;
+ u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+ u32 size_div, offset_div;
+ int ret = 0;
+
+ {
+ u64 o = offset;
+ u64 s = i_size_read(inode);
+
+ offset_div = do_div(o, osb->s_clustersize);
+ size_div = do_div(s, osb->s_clustersize);
+ }
+
+ if (offset <= i_size_read(inode))
+ return 0;
+
+ clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) -
+ ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode));
+ total_zero_len = offset - i_size_read(inode);
+ if (clusters_to_add)
+ total_zero_len -= offset_div;
+
+ /* Allocate clusters to fill out holes, and this is only needed
+ * when we add more than one clusters. Otherwise the cluster will
+ * be allocated during direct IO */
+ if (clusters_to_add > 1) {
+ ret = ocfs2_extend_allocation(inode,
+ OCFS2_I(inode)->ip_clusters,
+ clusters_to_add - 1, 0);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ while (total_zero_len) {
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
+ &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) +
+ size_div;
+ zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) -
+ size_div;
+ zero_len = min(total_zero_len, zero_len);
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev,
+ zero_start >> 9, zero_len >> 9,
+ GFP_NOFS, false);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ total_zero_len -= zero_len;
+ v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div);
+
+ /* Only at first iteration can be cluster not aligned.
+ * So set size_div to 0 for the rest */
+ size_div = 0;
+ }
+
+out:
+ return ret;
+}
+
static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
struct iov_iter *iter,
loff_t offset)
@@ -678,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
struct buffer_head *di_bh = NULL;
size_t count = iter->count;
journal_t *journal = osb->journal->j_journal;
- u32 zero_len;
- int cluster_align;
+ u64 zero_len_head, zero_len_tail;
+ int cluster_align_head, cluster_align_tail;
loff_t final_size = offset + count;
int append_write = offset >= i_size_read(inode) ? 1 : 0;
unsigned int num_clusters = 0;
@@ -687,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
{
u64 o = offset;
+ u64 s = i_size_read(inode);
+
+ zero_len_head = do_div(o, 1 << osb->s_clustersize_bits);
+ cluster_align_head = !zero_len_head;
- zero_len = do_div(o, 1 << osb->s_clustersize_bits);
- cluster_align = !zero_len;
+ zero_len_tail = osb->s_clustersize -
+ do_div(s, osb->s_clustersize);
+ if ((offset - i_size_read(inode)) < zero_len_tail)
+ zero_len_tail = offset - i_size_read(inode);
+ cluster_align_tail = !zero_len_tail;
}
/*
@@ -707,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
}
if (append_write) {
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) {
mlog_errno(ret);
goto clean_orphan;
}
+ /* zeroing out the previously allocated cluster tail
+ * that but not zeroed */
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- ret = ocfs2_zero_extend(inode, di_bh, offset);
+ ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
+ zero_len_tail, cluster_align_tail);
else
- ret = ocfs2_extend_no_holes(inode, di_bh, offset,
+ ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
offset);
if (ret < 0) {
mlog_errno(ret);
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
goto clean_orphan;
}
@@ -729,13 +849,10 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
if (is_overwrite < 0) {
mlog_errno(is_overwrite);
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
goto clean_orphan;
}
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
- di_bh = NULL;
}
written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
@@ -772,15 +889,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
if (ret < 0)
mlog_errno(ret);
}
- } else if (written < 0 && append_write && !is_overwrite &&
- !cluster_align) {
+ } else if (written > 0 && append_write && !is_overwrite &&
+ !cluster_align_head) {
+ /* zeroing out the allocated cluster head */
u32 p_cpos = 0;
u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
+ ret = ocfs2_inode_lock(inode, NULL, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
&num_clusters, &ext_flags);
if (ret < 0) {
mlog_errno(ret);
+ ocfs2_inode_unlock(inode, 0);
goto clean_orphan;
}
@@ -788,9 +913,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
ret = blkdev_issue_zeroout(osb->sb->s_bdev,
p_cpos << (osb->s_clustersize_bits - 9),
- zero_len >> 9, GFP_KERNEL, false);
+ zero_len_head >> 9, GFP_NOFS, false);
if (ret < 0)
mlog_errno(ret);
+
+ ocfs2_inode_unlock(inode, 0);
}
clean_orphan:
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 16eff45727ee..8e19b9d7aba8 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1312,7 +1312,9 @@ static int o2hb_debug_init(void)
int ret = -ENOMEM;
o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
- if (!o2hb_debug_dir) {
+ if (IS_ERR_OR_NULL(o2hb_debug_dir)) {
+ ret = o2hb_debug_dir ?
+ PTR_ERR(o2hb_debug_dir) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -1325,7 +1327,9 @@ static int o2hb_debug_init(void)
sizeof(o2hb_live_node_bitmap),
O2NM_MAX_NODES,
o2hb_live_node_bitmap);
- if (!o2hb_debug_livenodes) {
+ if (IS_ERR_OR_NULL(o2hb_debug_livenodes)) {
+ ret = o2hb_debug_livenodes ?
+ PTR_ERR(o2hb_debug_livenodes) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -1338,7 +1342,9 @@ static int o2hb_debug_init(void)
sizeof(o2hb_live_region_bitmap),
O2NM_MAX_REGIONS,
o2hb_live_region_bitmap);
- if (!o2hb_debug_liveregions) {
+ if (IS_ERR_OR_NULL(o2hb_debug_liveregions)) {
+ ret = o2hb_debug_liveregions ?
+ PTR_ERR(o2hb_debug_liveregions) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -1352,7 +1358,9 @@ static int o2hb_debug_init(void)
sizeof(o2hb_quorum_region_bitmap),
O2NM_MAX_REGIONS,
o2hb_quorum_region_bitmap);
- if (!o2hb_debug_quorumregions) {
+ if (IS_ERR_OR_NULL(o2hb_debug_quorumregions)) {
+ ret = o2hb_debug_quorumregions ?
+ PTR_ERR(o2hb_debug_quorumregions) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -1366,7 +1374,9 @@ static int o2hb_debug_init(void)
sizeof(o2hb_failed_region_bitmap),
O2NM_MAX_REGIONS,
o2hb_failed_region_bitmap);
- if (!o2hb_debug_failedregions) {
+ if (IS_ERR_OR_NULL(o2hb_debug_failedregions)) {
+ ret = o2hb_debug_failedregions ?
+ PTR_ERR(o2hb_debug_failedregions) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2000,7 +2010,8 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
reg->hr_debug_dir =
debugfs_create_dir(config_item_name(&reg->hr_item), dir);
- if (!reg->hr_debug_dir) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_dir)) {
+ ret = reg->hr_debug_dir ? PTR_ERR(reg->hr_debug_dir) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2013,7 +2024,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
O2HB_DB_TYPE_REGION_LIVENODES,
sizeof(reg->hr_live_node_bitmap),
O2NM_MAX_NODES, reg);
- if (!reg->hr_debug_livenodes) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_livenodes)) {
+ ret = reg->hr_debug_livenodes ?
+ PTR_ERR(reg->hr_debug_livenodes) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2025,7 +2038,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
sizeof(*(reg->hr_db_regnum)),
O2HB_DB_TYPE_REGION_NUMBER,
0, O2NM_MAX_NODES, reg);
- if (!reg->hr_debug_regnum) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_regnum)) {
+ ret = reg->hr_debug_regnum ?
+ PTR_ERR(reg->hr_debug_regnum) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2037,7 +2052,9 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
sizeof(*(reg->hr_db_elapsed_time)),
O2HB_DB_TYPE_REGION_ELAPSED_TIME,
0, 0, reg);
- if (!reg->hr_debug_elapsed_time) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_elapsed_time)) {
+ ret = reg->hr_debug_elapsed_time ?
+ PTR_ERR(reg->hr_debug_elapsed_time) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
@@ -2049,13 +2066,16 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir)
sizeof(*(reg->hr_db_pinned)),
O2HB_DB_TYPE_REGION_PINNED,
0, 0, reg);
- if (!reg->hr_debug_pinned) {
+ if (IS_ERR_OR_NULL(reg->hr_debug_pinned)) {
+ ret = reg->hr_debug_pinned ?
+ PTR_ERR(reg->hr_debug_pinned) : -ENOMEM;
mlog_errno(ret);
goto bail;
}
- ret = 0;
+ return 0;
bail:
+ debugfs_remove_recursive(reg->hr_debug_dir);
return ret;
}
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2260fb9e6508..7fdc25a4d8c0 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
} \
} while (0)
-#define mlog_errno(st) do { \
+#define mlog_errno(st) ({ \
int _st = (st); \
if (_st != -ERESTARTSYS && _st != -EINTR && \
_st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \
_st != -EDQUOT) \
mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
-} while (0)
+ _st; \
+})
#define mlog_bug_on_msg(cond, fmt, args...) do { \
if (cond) { \
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b08050bd3f2e..ccd4dcfc3645 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -18,7 +18,7 @@
*
* linux/fs/minix/dir.c
*
- * Copyright (C) 1991, 1992 Linux Torvalds
+ * Copyright (C) 1991, 1992 Linus Torvalds
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
@@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
int namelen)
{
- int ret;
+ int ret = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
trace_ocfs2_check_dir_for_entry(
(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
- ret = -EEXIST;
- if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0)
- goto bail;
+ if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
+ ret = -EEXIST;
+ mlog_errno(ret);
+ }
- ret = 0;
-bail:
ocfs2_free_dir_lookup_result(&lookup);
- if (ret)
- mlog_errno(ret);
return ret;
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 11849a44dc5a..956edf67be20 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
int noqueue_attempted = 0;
int dlm_locked = 0;
+ if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
+ mlog_errno(-EINVAL);
+ return -EINVAL;
+ }
+
ocfs2_init_mask_waiter(&mw);
if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
@@ -2954,7 +2959,7 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
osb->osb_debug_root,
osb,
&ocfs2_dlm_debug_fops);
- if (!dlm_debug->d_locking_state) {
+ if (IS_ERR_OR_NULL(dlm_debug->d_locking_state)) {
ret = -EINVAL;
mlog(ML_ERROR,
"Unable to create locking state debugfs file.\n");
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 29651167190d..540dc4bdd042 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
}
status = ocfs2_test_inode_bit(osb, blkno, &set);
- trace_ocfs2_get_dentry_test_bit(status, set);
if (status < 0) {
if (status == -EINVAL) {
/*
@@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
goto unlock_nfs_sync;
}
+ trace_ocfs2_get_dentry_test_bit(status, set);
/* If the inode allocator bit is clear, this inode must be stale */
if (!set) {
status = -ESTALE;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0a6ec7e6efd8..8c48e989beba 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2392,7 +2392,6 @@ relock:
/*
* for completing the rest of the request.
*/
- *ppos += written;
count -= written;
written_buffered = generic_perform_write(file, from, *ppos);
/*
@@ -2407,7 +2406,6 @@ relock:
goto out_dio;
}
- iocb->ki_pos = *ppos + written_buffered;
/* We need to ensure that the page cache pages are written to
* disk and invalidated to preserve the expected O_DIRECT
* semantics.
@@ -2416,6 +2414,7 @@ relock:
ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
endbyte);
if (ret == 0) {
+ iocb->ki_pos = *ppos + written_buffered;
written += written_buffered;
invalidate_mapping_pages(mapping,
*ppos >> PAGE_CACHE_SHIFT,
@@ -2438,10 +2437,14 @@ out_dio:
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
+ if (unlikely(written <= 0))
+ goto no_sync;
+
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
- *ppos + count - 1);
+ ret = filemap_fdatawrite_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
if (ret < 0)
written = ret;
@@ -2452,10 +2455,12 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, *ppos,
- *ppos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
}
+no_sync:
/*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
* function pointer which is called when o_direct io completes so that
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3025c0da6b8a..be71ca0937f7 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode,
ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
le16_to_cpu(di->i_suballoc_slot));
if (!inode_alloc_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto bail;
}
@@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
ORPHAN_DIR_SYSTEM_INODE,
orphaned_slot);
if (!orphan_dir_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 044013455621..857bbbcd39f3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
ocfs2_local_alloc_count_bits(alloc)) {
ocfs2_error(osb->sb, "local alloc inode %llu says it has "
- "%u free bits, but a count shows %u",
+ "%u used bits, but a count shows %u",
(unsigned long long)le64_to_cpu(alloc->i_blkno),
le32_to_cpu(alloc->id1.bitmap1.i_used),
ocfs2_local_alloc_count_bits(alloc));
@@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
u32 *numbits,
struct ocfs2_alloc_reservation *resv)
{
- int numfound, bitoff, left, startoff, lastzero;
+ int numfound = 0, bitoff, left, startoff, lastzero;
int local_resv = 0;
struct ocfs2_alloc_reservation r;
void *bitmap = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b5c3a5ea3ee6..09f90cbf0e24 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
trace_ocfs2_orphan_del(
(unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
- name, namelen);
+ name, strlen(name));
/* find it's spot in the orphan directory */
- status = ocfs2_find_entry(name, namelen, orphan_dir_inode,
+ status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode,
&lookup);
if (status) {
mlog_errno(status);
@@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
ORPHAN_DIR_SYSTEM_INODE,
osb->slot_num);
if (!orphan_dir_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto leave;
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ee541f92dab4..df3a500789c7 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
error = posix_acl_create(dir, &mode, &default_acl, &acl);
if (error) {
mlog_errno(error);
- goto out;
+ return error;
}
error = ocfs2_create_inode_in_orphan(dir, mode,
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index d5493e361a38..e78a203d44c8 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
if (!si) {
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ return status;
}
si->si_extended = ocfs2_uses_extended_slot_map(osb);
@@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
osb->slot_info = (struct ocfs2_slot_info *)si;
bail:
- if (status < 0 && si)
+ if (status < 0)
__ocfs2_free_slot_info(si);
return status;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 1724d43d3da1..220cae7bbdbc 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -295,7 +295,7 @@ static int o2cb_cluster_check(void)
set_bit(node_num, netmap);
if (!memcmp(hbmap, netmap, sizeof(hbmap)))
return 0;
- if (i < O2CB_MAP_STABILIZE_COUNT)
+ if (i < O2CB_MAP_STABILIZE_COUNT - 1)
msleep(1000);
}
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 720aa389e0ea..2768eb1da2b8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
BUG_ON(conn == NULL);
lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
- if (!lc) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!lc)
+ return -ENOMEM;
init_waitqueue_head(&lc->oc_wait);
init_completion(&lc->oc_sync_wait);
@@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
}
out:
- if (rc && lc)
+ if (rc)
kfree(lc);
return rc;
}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 0cb889a17ae1..4479029630bb 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
+ ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
+ start_bit, count);
goto bail;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 26675185b886..837ddce4b659 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1112,7 +1112,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
ocfs2_debugfs_root);
- if (!osb->osb_debug_root) {
+ if (IS_ERR_OR_NULL(osb->osb_debug_root)) {
status = -EINVAL;
mlog(ML_ERROR, "Unable to create per-mount debugfs root.\n");
goto read_super_error;
@@ -1122,7 +1122,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->osb_debug_root,
osb,
&ocfs2_osb_debug_fops);
- if (!osb->osb_ctxt) {
+ if (IS_ERR_OR_NULL(osb->osb_ctxt)) {
status = -EINVAL;
mlog_errno(status);
goto read_super_error;
@@ -1606,8 +1606,9 @@ static int __init ocfs2_init(void)
}
ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
- if (!ocfs2_debugfs_root) {
- status = -ENOMEM;
+ if (IS_ERR_OR_NULL(ocfs2_debugfs_root)) {
+ status = ocfs2_debugfs_root ?
+ PTR_ERR(ocfs2_debugfs_root) : -ENOMEM;
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
goto out4;
}
@@ -2069,6 +2070,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
+ memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+ sizeof(di->id2.i_super.s_uuid));
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
@@ -2333,7 +2336,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog_errno(status);
goto bail;
}
- cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb);
+ cleancache_init_shared_fs(sb);
bail:
return status;
@@ -2563,22 +2566,22 @@ static void ocfs2_handle_error(struct super_block *sb)
ocfs2_set_ro_flag(osb, 0);
}
-static char error_buf[1024];
-
-void __ocfs2_error(struct super_block *sb,
- const char *function,
- const char *fmt, ...)
+void __ocfs2_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- vsnprintf(error_buf, sizeof(error_buf), fmt, args);
- va_end(args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
/* Not using mlog here because we want to show the actual
* function the error came from. */
- printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
+ va_end(args);
ocfs2_handle_error(sb);
}
@@ -2586,18 +2589,21 @@ void __ocfs2_error(struct super_block *sb,
/* Handle critical errors. This is intentionally more drastic than
* ocfs2_handle_error, so we only use for things like journal errors,
* etc. */
-void __ocfs2_abort(struct super_block* sb,
- const char *function,
+void __ocfs2_abort(struct super_block *sb, const char *function,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- vsnprintf(error_buf, sizeof(error_buf), fmt, args);
- va_end(args);
- printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
+ va_end(args);
/* We don't have the cluster support yet to go straight to
* hard readonly in here. Until then, we want to keep
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 85b190dc132f..4ca7533be479 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode,
i,
&block_off,
&name_offset);
+ if (ret) {
+ mlog_errno(ret);
+ goto cleanup;
+ }
xs->base = bucket_block(xs->bucket, block_off);
}
if (ocfs2_xattr_is_local(xs->here)) {
@@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
i, &xv, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
args->ref_ci,
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 39d1373128e9..44a549beeafa 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev)
mem_address = pdata->mem_address;
record_size = pdata->record_size;
dump_oops = pdata->dump_oops;
+ ramoops_console_size = pdata->console_size;
+ ramoops_pmsg_size = pdata->pmsg_size;
+ ramoops_ftrace_size = pdata->ftrace_size;
pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/super.c b/fs/super.c
index 2b7dc90ccdbb..928c20f47af9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
- s->cleancache_poolid = -1;
+ s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
s->s_shrink.scan_objects = super_cache_scan;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2554d8835b48..b400c04371f0 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
if (grp->attrs) {
for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
- umode_t mode = 0;
+ umode_t mode = (*attr)->mode;
/*
* In update mode, we're changing the permissions or
@@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
if (!mode)
continue;
}
+
+ WARN(mode & ~(SYSFS_PREALLOC | 0664),
+ "Attribute %s: Invalid permissions 0%o\n",
+ (*attr)->name, mode);
+
+ mode &= SYSFS_PREALLOC | 0664;
error = sysfs_add_file_mode_ns(parent, *attr, false,
- (*attr)->mode | mode,
- NULL);
+ mode, NULL);
if (unlikely(error))
break;
}
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
new file mode 100644
index 000000000000..82fa35b656c4
--- /dev/null
+++ b/fs/tracefs/Makefile
@@ -0,0 +1,4 @@
+tracefs-objs := inode.o
+
+obj-$(CONFIG_TRACING) += tracefs.o
+
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 000000000000..d92bdf3b079a
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,650 @@
+/*
+ * inode.c - part of tracefs, a pseudo file system for activating tracing
+ *
+ * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
+ *
+ * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * tracefs is the file system that is used by the tracing infrastructure.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/kobject.h>
+#include <linux/namei.h>
+#include <linux/tracefs.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+#include <linux/parser.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+
+#define TRACEFS_DEFAULT_MODE 0700
+
+static struct vfsmount *tracefs_mount;
+static int tracefs_mount_count;
+static bool tracefs_registered;
+
+static ssize_t default_read_file(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return 0;
+}
+
+static ssize_t default_write_file(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return count;
+}
+
+static const struct file_operations tracefs_file_operations = {
+ .read = default_read_file,
+ .write = default_write_file,
+ .open = simple_open,
+ .llseek = noop_llseek,
+};
+
+static struct tracefs_dir_ops {
+ int (*mkdir)(const char *name);
+ int (*rmdir)(const char *name);
+} tracefs_ops;
+
+static char *get_dname(struct dentry *dentry)
+{
+ const char *dname;
+ char *name;
+ int len = dentry->d_name.len;
+
+ dname = dentry->d_name.name;
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name)
+ return NULL;
+ memcpy(name, dname, len);
+ name[len] = 0;
+ return name;
+}
+
+static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
+{
+ char *name;
+ int ret;
+
+ name = get_dname(dentry);
+ if (!name)
+ return -ENOMEM;
+
+ /*
+ * The mkdir call can call the generic functions that create
+ * the files within the tracefs system. It is up to the individual
+ * mkdir routine to handle races.
+ */
+ mutex_unlock(&inode->i_mutex);
+ ret = tracefs_ops.mkdir(name);
+ mutex_lock(&inode->i_mutex);
+
+ kfree(name);
+
+ return ret;
+}
+
+static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
+{
+ char *name;
+ int ret;
+
+ name = get_dname(dentry);
+ if (!name)
+ return -ENOMEM;
+
+ /*
+ * The rmdir call can call the generic functions that create
+ * the files within the tracefs system. It is up to the individual
+ * rmdir routine to handle races.
+ * This time we need to unlock not only the parent (inode) but
+ * also the directory that is being deleted.
+ */
+ mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
+
+ ret = tracefs_ops.rmdir(name);
+
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock(&dentry->d_inode->i_mutex);
+
+ kfree(name);
+
+ return ret;
+}
+
+static const struct inode_operations tracefs_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .mkdir = tracefs_syscall_mkdir,
+ .rmdir = tracefs_syscall_rmdir,
+};
+
+static struct inode *tracefs_get_inode(struct super_block *sb)
+{
+ struct inode *inode = new_inode(sb);
+ if (inode) {
+ inode->i_ino = get_next_ino();
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ }
+ return inode;
+}
+
+struct tracefs_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+};
+
+enum {
+ Opt_uid,
+ Opt_gid,
+ Opt_mode,
+ Opt_err
+};
+
+static const match_table_t tokens = {
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_mode, "mode=%o"},
+ {Opt_err, NULL}
+};
+
+struct tracefs_fs_info {
+ struct tracefs_mount_opts mount_opts;
+};
+
+static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+{
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ int token;
+ kuid_t uid;
+ kgid_t gid;
+ char *p;
+
+ opts->mode = TRACEFS_DEFAULT_MODE;
+
+ while ((p = strsep(&data, ",")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid))
+ return -EINVAL;
+ opts->uid = uid;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid))
+ return -EINVAL;
+ opts->gid = gid;
+ break;
+ case Opt_mode:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ opts->mode = option & S_IALLUGO;
+ break;
+ /*
+ * We might like to report bad mount options here;
+ * but traditionally tracefs has ignored all mount options
+ */
+ }
+ }
+
+ return 0;
+}
+
+static int tracefs_apply_options(struct super_block *sb)
+{
+ struct tracefs_fs_info *fsi = sb->s_fs_info;
+ struct inode *inode = sb->s_root->d_inode;
+ struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+ inode->i_mode &= ~S_IALLUGO;
+ inode->i_mode |= opts->mode;
+
+ inode->i_uid = opts->uid;
+ inode->i_gid = opts->gid;
+
+ return 0;
+}
+
+static int tracefs_remount(struct super_block *sb, int *flags, char *data)
+{
+ int err;
+ struct tracefs_fs_info *fsi = sb->s_fs_info;
+
+ sync_filesystem(sb);
+ err = tracefs_parse_options(data, &fsi->mount_opts);
+ if (err)
+ goto fail;
+
+ tracefs_apply_options(sb);
+
+fail:
+ return err;
+}
+
+static int tracefs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
+ struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+ if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, opts->uid));
+ if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, opts->gid));
+ if (opts->mode != TRACEFS_DEFAULT_MODE)
+ seq_printf(m, ",mode=%o", opts->mode);
+
+ return 0;
+}
+
+static const struct super_operations tracefs_super_operations = {
+ .statfs = simple_statfs,
+ .remount_fs = tracefs_remount,
+ .show_options = tracefs_show_options,
+};
+
+static int trace_fill_super(struct super_block *sb, void *data, int silent)
+{
+ static struct tree_descr trace_files[] = {{""}};
+ struct tracefs_fs_info *fsi;
+ int err;
+
+ save_mount_options(sb, data);
+
+ fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
+ sb->s_fs_info = fsi;
+ if (!fsi) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ err = tracefs_parse_options(data, &fsi->mount_opts);
+ if (err)
+ goto fail;
+
+ err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
+ if (err)
+ goto fail;
+
+ sb->s_op = &tracefs_super_operations;
+
+ tracefs_apply_options(sb);
+
+ return 0;
+
+fail:
+ kfree(fsi);
+ sb->s_fs_info = NULL;
+ return err;
+}
+
+static struct dentry *trace_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return mount_single(fs_type, flags, data, trace_fill_super);
+}
+
+static struct file_system_type trace_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "tracefs",
+ .mount = trace_mount,
+ .kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("tracefs");
+
+static struct dentry *start_creating(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+ int error;
+
+ pr_debug("tracefs: creating file '%s'\n",name);
+
+ error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+ &tracefs_mount_count);
+ if (error)
+ return ERR_PTR(error);
+
+ /* If the parent is not specified, we create it in the root.
+ * We need the root dentry to do this, which is in the super
+ * block. A pointer to that is in the struct vfsmount that we
+ * have around.
+ */
+ if (!parent)
+ parent = tracefs_mount->mnt_root;
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ dentry = lookup_one_len(name, parent, strlen(name));
+ if (!IS_ERR(dentry) && dentry->d_inode) {
+ dput(dentry);
+ dentry = ERR_PTR(-EEXIST);
+ }
+ if (IS_ERR(dentry))
+ mutex_unlock(&parent->d_inode->i_mutex);
+ return dentry;
+}
+
+static struct dentry *failed_creating(struct dentry *dentry)
+{
+ mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ dput(dentry);
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+ return NULL;
+}
+
+static struct dentry *end_creating(struct dentry *dentry)
+{
+ mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ return dentry;
+}
+
+/**
+ * tracefs_create_file - create a file in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is NULL, then the
+ * file will be created in the root of the tracefs filesystem.
+ * @data: a pointer to something that the caller will want to get to later
+ * on. The inode.i_private pointer will point to this value on
+ * the open() call.
+ * @fops: a pointer to a struct file_operations that should be used for
+ * this file.
+ *
+ * This is the basic "create a file" function for tracefs. It allows for a
+ * wide range of flexibility in creating a file, or a directory (if you want
+ * to create a directory, the tracefs_create_dir() function is
+ * recommended to be used instead.)
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, %NULL will be returned.
+ *
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *tracefs_create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fops)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+ BUG_ON(!S_ISREG(mode));
+ dentry = start_creating(name, parent);
+
+ if (IS_ERR(dentry))
+ return NULL;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return failed_creating(dentry);
+
+ inode->i_mode = mode;
+ inode->i_fop = fops ? fops : &tracefs_file_operations;
+ inode->i_private = data;
+ d_instantiate(dentry, inode);
+ fsnotify_create(dentry->d_parent->d_inode, dentry);
+ return end_creating(dentry);
+}
+
+static struct dentry *__create_dir(const char *name, struct dentry *parent,
+ const struct inode_operations *ops)
+{
+ struct dentry *dentry = start_creating(name, parent);
+ struct inode *inode;
+
+ if (IS_ERR(dentry))
+ return NULL;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return failed_creating(dentry);
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = ops;
+ inode->i_fop = &simple_dir_operations;
+
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ return end_creating(dentry);
+}
+
+/**
+ * tracefs_create_dir - create a directory in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the directory to
+ * create.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is NULL, then the
+ * directory will be created in the root of the tracefs filesystem.
+ *
+ * This function creates a directory in tracefs with the given name.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed. If an error occurs, %NULL will be returned.
+ *
+ * If tracing is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
+{
+ return __create_dir(name, parent, &simple_dir_inode_operations);
+}
+
+/**
+ * tracefs_create_instance_dir - create the tracing instances directory
+ * @name: The name of the instances directory to create
+ * @parent: The parent directory that the instances directory will exist
+ * @mkdir: The function to call when a mkdir is performed.
+ * @rmdir: The function to call when a rmdir is performed.
+ *
+ * Only one instances directory is allowed.
+ *
+ * The instances directory is special as it allows for mkdir and rmdir to
+ * to be done by userspace. When a mkdir or rmdir is performed, the inode
+ * locks are released and the methhods passed in (@mkdir and @rmdir) are
+ * called without locks and with the name of the directory being created
+ * within the instances directory.
+ *
+ * Returns the dentry of the instances directory.
+ */
+struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
+ int (*mkdir)(const char *name),
+ int (*rmdir)(const char *name))
+{
+ struct dentry *dentry;
+
+ /* Only allow one instance of the instances directory. */
+ if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
+ return NULL;
+
+ dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
+ if (!dentry)
+ return NULL;
+
+ tracefs_ops.mkdir = mkdir;
+ tracefs_ops.rmdir = rmdir;
+
+ return dentry;
+}
+
+static inline int tracefs_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
+static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
+{
+ int ret = 0;
+
+ if (tracefs_positive(dentry)) {
+ if (dentry->d_inode) {
+ dget(dentry);
+ switch (dentry->d_inode->i_mode & S_IFMT) {
+ case S_IFDIR:
+ ret = simple_rmdir(parent->d_inode, dentry);
+ break;
+ default:
+ simple_unlink(parent->d_inode, dentry);
+ break;
+ }
+ if (!ret)
+ d_delete(dentry);
+ dput(dentry);
+ }
+ }
+ return ret;
+}
+
+/**
+ * tracefs_remove - removes a file or directory from the tracefs filesystem
+ * @dentry: a pointer to a the dentry of the file or directory to be
+ * removed.
+ *
+ * This function removes a file or directory in tracefs that was previously
+ * created with a call to another tracefs function (like
+ * tracefs_create_file() or variants thereof.)
+ */
+void tracefs_remove(struct dentry *dentry)
+{
+ struct dentry *parent;
+ int ret;
+
+ if (IS_ERR_OR_NULL(dentry))
+ return;
+
+ parent = dentry->d_parent;
+ if (!parent || !parent->d_inode)
+ return;
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ ret = __tracefs_remove(dentry, parent);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ if (!ret)
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+}
+
+/**
+ * tracefs_remove_recursive - recursively removes a directory
+ * @dentry: a pointer to a the dentry of the directory to be removed.
+ *
+ * This function recursively removes a directory tree in tracefs that
+ * was previously created with a call to another tracefs function
+ * (like tracefs_create_file() or variants thereof.)
+ */
+void tracefs_remove_recursive(struct dentry *dentry)
+{
+ struct dentry *child, *parent;
+
+ if (IS_ERR_OR_NULL(dentry))
+ return;
+
+ parent = dentry->d_parent;
+ if (!parent || !parent->d_inode)
+ return;
+
+ parent = dentry;
+ down:
+ mutex_lock(&parent->d_inode->i_mutex);
+ loop:
+ /*
+ * The parent->d_subdirs is protected by the d_lock. Outside that
+ * lock, the child can be unlinked and set to be freed which can
+ * use the d_u.d_child as the rcu head and corrupt this list.
+ */
+ spin_lock(&parent->d_lock);
+ list_for_each_entry(child, &parent->d_subdirs, d_child) {
+ if (!tracefs_positive(child))
+ continue;
+
+ /* perhaps simple_empty(child) makes more sense */
+ if (!list_empty(&child->d_subdirs)) {
+ spin_unlock(&parent->d_lock);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ parent = child;
+ goto down;
+ }
+
+ spin_unlock(&parent->d_lock);
+
+ if (!__tracefs_remove(child, parent))
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+
+ /*
+ * The parent->d_lock protects agaist child from unlinking
+ * from d_subdirs. When releasing the parent->d_lock we can
+ * no longer trust that the next pointer is valid.
+ * Restart the loop. We'll skip this one with the
+ * tracefs_positive() check.
+ */
+ goto loop;
+ }
+ spin_unlock(&parent->d_lock);
+
+ mutex_unlock(&parent->d_inode->i_mutex);
+ child = parent;
+ parent = parent->d_parent;
+ mutex_lock(&parent->d_inode->i_mutex);
+
+ if (child != dentry)
+ /* go up */
+ goto loop;
+
+ if (!__tracefs_remove(child, parent))
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+ mutex_unlock(&parent->d_inode->i_mutex);
+}
+
+/**
+ * tracefs_initialized - Tells whether tracefs has been registered
+ */
+bool tracefs_initialized(void)
+{
+ return tracefs_registered;
+}
+
+static struct kobject *trace_kobj;
+
+static int __init tracefs_init(void)
+{
+ int retval;
+
+ trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
+ if (!trace_kobj)
+ return -EINVAL;
+
+ retval = register_filesystem(&trace_fs_type);
+ if (!retval)
+ tracefs_registered = true;
+
+ return retval;
+}
+core_initcall(tracefs_init);