aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/aio.c21
-rw-r--r--fs/block_dev.c10
-rw-r--r--fs/btrfs/compression.c33
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/ctree.c14
-rw-r--r--fs/btrfs/file-item.c2
-rw-r--r--fs/btrfs/locking.c24
-rw-r--r--fs/btrfs/locking.h2
-rw-r--r--fs/btrfs/lzo.c15
-rw-r--r--fs/btrfs/zlib.c20
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/cifs/cifs_debug.c77
-rw-r--r--fs/cifs/cifs_debug.h7
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/connect.c51
-rw-r--r--fs/cifs/misc.c32
-rw-r--r--fs/cifs/readdir.c4
-rw-r--r--fs/cifs/sess.c5
-rw-r--r--fs/cifs/smb2ops.c63
-rw-r--r--fs/cifs/smb2pdu.c3
-rw-r--r--fs/cifs/smb2pdu.h19
-rw-r--r--fs/cifs/transport.c4
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/efivarfs/super.c11
-rw-r--r--fs/exec.c1
-rw-r--r--fs/f2fs/acl.c148
-rw-r--r--fs/f2fs/acl.h5
-rw-r--r--fs/f2fs/checkpoint.c186
-rw-r--r--fs/f2fs/data.c166
-rw-r--r--fs/f2fs/debug.c15
-rw-r--r--fs/f2fs/dir.c308
-rw-r--r--fs/f2fs/f2fs.h176
-rw-r--r--fs/f2fs/file.c212
-rw-r--r--fs/f2fs/gc.c89
-rw-r--r--fs/f2fs/gc.h5
-rw-r--r--fs/f2fs/inline.c482
-rw-r--r--fs/f2fs/inode.c44
-rw-r--r--fs/f2fs/namei.c58
-rw-r--r--fs/f2fs/node.c163
-rw-r--r--fs/f2fs/node.h8
-rw-r--r--fs/f2fs/recovery.c14
-rw-r--r--fs/f2fs/segment.c122
-rw-r--r--fs/f2fs/segment.h8
-rw-r--r--fs/f2fs/super.c29
-rw-r--r--fs/f2fs/xattr.c6
-rw-r--r--fs/f2fs/xattr.h6
-rw-r--r--fs/fat/namei_vfat.c20
-rw-r--r--fs/gfs2/dir.c39
-rw-r--r--fs/gfs2/file.c83
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/glops.c26
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h19
-rw-r--r--fs/gfs2/inode.c72
-rw-r--r--fs/gfs2/log.c42
-rw-r--r--fs/gfs2/main.c11
-rw-r--r--fs/gfs2/ops_fstype.c18
-rw-r--r--fs/gfs2/quota.c9
-rw-r--r--fs/gfs2/rgrp.c69
-rw-r--r--fs/gfs2/rgrp.h1
-rw-r--r--fs/gfs2/super.c112
-rw-r--r--fs/gfs2/super.h1
-rw-r--r--fs/gfs2/trans.c17
-rw-r--r--fs/ioctl.c6
-rw-r--r--fs/isofs/inode.c42
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c14
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/delegation.c25
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/direct.c1
-rw-r--r--fs/nfs/filelayout/filelayout.c3
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c3
-rw-r--r--fs/nfs/fscache.c24
-rw-r--r--fs/nfs/inode.c11
-rw-r--r--fs/nfs/iostat.h5
-rw-r--r--fs/nfs/netns.h1
-rw-r--r--fs/nfs/nfs42.h2
-rw-r--r--fs/nfs/nfs42proc.c77
-rw-r--r--fs/nfs/nfs42xdr.c139
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4client.c46
-rw-r--r--fs/nfs/nfs4file.c31
-rw-r--r--fs/nfs/nfs4proc.c107
-rw-r--r--fs/nfs/nfs4xdr.c12
-rw-r--r--fs/nfs/pagelist.c11
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/write.c21
-rw-r--r--fs/nfsd/nfs4callback.c8
-rw-r--r--fs/nfsd/nfsd.h9
-rw-r--r--fs/notify/fsnotify.c36
-rw-r--r--fs/notify/fsnotify.h4
-rw-r--r--fs/notify/inode_mark.c8
-rw-r--r--fs/notify/inotify/inotify_user.c9
-rw-r--r--fs/notify/mark.c36
-rw-r--r--fs/notify/vfsmount_mark.c8
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/overlayfs/Kconfig2
-rw-r--r--fs/overlayfs/Makefile4
-rw-r--r--fs/overlayfs/dir.c31
-rw-r--r--fs/overlayfs/inode.c27
-rw-r--r--fs/overlayfs/readdir.c41
-rw-r--r--fs/overlayfs/super.c61
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/pstore/inode.c22
-rw-r--r--fs/pstore/ram.c22
-rw-r--r--fs/xfs/xfs_bmap_util.c72
-rw-r--r--fs/xfs/xfs_itable.c250
-rw-r--r--fs/xfs/xfs_itable.h16
113 files changed, 2947 insertions, 1546 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 34a1b9dea6dd..da0bbb456d3f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/
obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
-obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/
+obj-$(CONFIG_OVERLAY_FS) += overlayfs/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
obj-$(CONFIG_OMFS_FS) += omfs/
diff --git a/fs/aio.c b/fs/aio.c
index 84a751005f5b..14b93159ef83 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -165,6 +165,15 @@ static struct vfsmount *aio_mnt;
static const struct file_operations aio_ring_fops;
static const struct address_space_operations aio_ctx_aops;
+/* Backing dev info for aio fs.
+ * -no dirty page accounting or writeback happens
+ */
+static struct backing_dev_info aio_fs_backing_dev_info = {
+ .name = "aiofs",
+ .state = 0,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY,
+};
+
static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
{
struct qstr this = QSTR_INIT("[aio]", 5);
@@ -176,6 +185,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
inode->i_mapping->a_ops = &aio_ctx_aops;
inode->i_mapping->private_data = ctx;
+ inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info;
inode->i_size = PAGE_SIZE * nr_pages;
path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
@@ -220,6 +230,9 @@ static int __init aio_setup(void)
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
+ if (bdi_init(&aio_fs_backing_dev_info))
+ panic("Failed to init aio fs backing dev info.");
+
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
@@ -281,11 +294,6 @@ static const struct file_operations aio_ring_fops = {
.mmap = aio_ring_mmap,
};
-static int aio_set_page_dirty(struct page *page)
-{
- return 0;
-}
-
#if IS_ENABLED(CONFIG_MIGRATION)
static int aio_migratepage(struct address_space *mapping, struct page *new,
struct page *old, enum migrate_mode mode)
@@ -357,7 +365,7 @@ out:
#endif
static const struct address_space_operations aio_ctx_aops = {
- .set_page_dirty = aio_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_no_writeback,
#if IS_ENABLED(CONFIG_MIGRATION)
.migratepage = aio_migratepage,
#endif
@@ -412,7 +420,6 @@ static int aio_setup_ring(struct kioctx *ctx)
pr_debug("pid(%d) page[%d]->count=%d\n",
current->pid, i, page_count(page));
SetPageUptodate(page);
- SetPageDirty(page);
unlock_page(page);
ctx->ring_pages[i] = page;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1d9c9f3754f8..b48c41bf0f86 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -235,7 +235,10 @@ struct super_block *freeze_bdev(struct block_device *bdev)
sb = get_active_super(bdev);
if (!sb)
goto out;
- error = freeze_super(sb);
+ if (sb->s_op->freeze_super)
+ error = sb->s_op->freeze_super(sb);
+ else
+ error = freeze_super(sb);
if (error) {
deactivate_super(sb);
bdev->bd_fsfreeze_count--;
@@ -272,7 +275,10 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
if (!sb)
goto out;
- error = thaw_super(sb);
+ if (sb->s_op->thaw_super)
+ error = sb->s_op->thaw_super(sb);
+ else
+ error = thaw_super(sb);
if (error) {
bdev->bd_fsfreeze_count++;
mutex_unlock(&bdev->bd_fsfreeze_mutex);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d3220d31d3cb..dcd9be32ac57 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1011,8 +1011,6 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
- if (*pg_index == (vcnt - 1) && *pg_offset == 0)
- memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
kunmap_atomic(kaddr);
flush_dcache_page(page_out);
@@ -1054,3 +1052,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
return 1;
}
+
+/*
+ * When uncompressing data, we need to make sure and zero any parts of
+ * the biovec that were not filled in by the decompression code. pg_index
+ * and pg_offset indicate the last page and the last offset of that page
+ * that have been filled in. This will zero everything remaining in the
+ * biovec.
+ */
+void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
+ unsigned long pg_index,
+ unsigned long pg_offset)
+{
+ while (pg_index < vcnt) {
+ struct page *page = bvec[pg_index].bv_page;
+ unsigned long off = bvec[pg_index].bv_offset;
+ unsigned long len = bvec[pg_index].bv_len;
+
+ if (pg_offset < off)
+ pg_offset = off;
+ if (pg_offset < off + len) {
+ unsigned long bytes = off + len - pg_offset;
+ char *kaddr;
+
+ kaddr = kmap_atomic(page);
+ memset(kaddr + pg_offset, 0, bytes);
+ kunmap_atomic(kaddr);
+ }
+ pg_index++;
+ pg_offset = 0;
+ }
+}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 0c803b4fbf93..d181f70caae0 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -45,7 +45,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long nr_pages);
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
-
+void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
+ unsigned long pg_index,
+ unsigned long pg_offset);
struct btrfs_compress_op {
struct list_head *(*alloc_workspace)(void);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 19bc6162fb8e..150822ee0a0b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
{
int i;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /* lockdep really cares that we take all of these spinlocks
- * in the right order. If any of the locks in the path are not
- * currently blocking, it is going to complain. So, make really
- * really sure by forcing the path to blocking before we clear
- * the path blocking.
- */
if (held) {
btrfs_set_lock_blocking_rw(held, held_rw);
if (held_rw == BTRFS_WRITE_LOCK)
@@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
held_rw = BTRFS_READ_LOCK_BLOCKING;
}
btrfs_set_path_blocking(p);
-#endif
for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
if (p->nodes[i] && p->locks[i]) {
@@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
}
}
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
if (held)
btrfs_clear_lock_blocking_rw(held, held_rw);
-#endif
}
/* this also releases the path */
@@ -2893,7 +2883,7 @@ cow_done:
}
p->locks[level] = BTRFS_WRITE_LOCK;
} else {
- err = btrfs_try_tree_read_lock(b);
+ err = btrfs_tree_read_lock_atomic(b);
if (!err) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
@@ -3025,7 +3015,7 @@ again:
}
level = btrfs_header_level(b);
- err = btrfs_try_tree_read_lock(b);
+ err = btrfs_tree_read_lock_atomic(b);
if (!err) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 783a94355efd..84a2d1868271 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -413,7 +413,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
ret = 0;
fail:
while (ret < 0 && !list_empty(&tmplist)) {
- sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
+ sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
list_del(&sums->list);
kfree(sums);
}
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 5665d2149249..f8229ef1b46d 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -128,6 +128,26 @@ again:
}
/*
+ * take a spinning read lock.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
+ */
+int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
+{
+ if (atomic_read(&eb->blocking_writers))
+ return 0;
+
+ read_lock(&eb->lock);
+ if (atomic_read(&eb->blocking_writers)) {
+ read_unlock(&eb->lock);
+ return 0;
+ }
+ atomic_inc(&eb->read_locks);
+ atomic_inc(&eb->spinning_readers);
+ return 1;
+}
+
+/*
* returns 1 if we get the read lock and 0 if we don't
* this won't wait for blocking writers
*/
@@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
atomic_read(&eb->blocking_readers))
return 0;
- if (!write_trylock(&eb->lock))
- return 0;
-
+ write_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers) ||
atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index b81e0e9a4894..c44a9d5f5362 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_assert_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
+
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
{
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 78285f30909e..617553cdb7d3 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -373,6 +373,8 @@ cont:
}
done:
kunmap(pages_in[page_in_index]);
+ if (!ret)
+ btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset);
return ret;
}
@@ -410,10 +412,23 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
goto out;
}
+ /*
+ * the caller is already checking against PAGE_SIZE, but lets
+ * move this check closer to the memcpy/memset
+ */
+ destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes = min_t(unsigned long, destlen, out_len - start_byte);
kaddr = kmap_atomic(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
+
+ /*
+ * btrfs_getblock is doing a zero on the tail of the page too,
+ * but this will cover anything missing from the decompressed
+ * data.
+ */
+ if (bytes < destlen)
+ memset(kaddr+bytes, 0, destlen-bytes);
kunmap_atomic(kaddr);
out:
return ret;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 759fa4e2de8f..fb22fd8d8fb8 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -299,6 +299,8 @@ done:
zlib_inflateEnd(&workspace->strm);
if (data_in)
kunmap(pages_in[page_in_index]);
+ if (!ret)
+ btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset);
return ret;
}
@@ -310,10 +312,14 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
int wbits = MAX_WBITS;
- unsigned long bytes_left = destlen;
+ unsigned long bytes_left;
unsigned long total_out = 0;
+ unsigned long pg_offset = 0;
char *kaddr;
+ destlen = min_t(unsigned long, destlen, PAGE_SIZE);
+ bytes_left = destlen;
+
workspace->strm.next_in = data_in;
workspace->strm.avail_in = srclen;
workspace->strm.total_in = 0;
@@ -341,7 +347,6 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
unsigned long buf_start;
unsigned long buf_offset;
unsigned long bytes;
- unsigned long pg_offset = 0;
ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
if (ret != Z_OK && ret != Z_STREAM_END)
@@ -384,6 +389,17 @@ next:
ret = 0;
zlib_inflateEnd(&workspace->strm);
+
+ /*
+ * this should only happen if zlib returned fewer bytes than we
+ * expected. btrfs_get_block is responsible for zeroing from the
+ * end of the inline extent (destlen) to the end of the page
+ */
+ if (pg_offset < destlen) {
+ kaddr = kmap_atomic(dest_page);
+ memset(kaddr + pg_offset, 0, destlen - pg_offset);
+ kunmap_atomic(kaddr);
+ }
return ret;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 659f2ea9e6f7..cefca661464b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
for (i = 0; i < CEPH_CAP_BITS; i++)
if ((dirty & (1 << i)) &&
- flush_tid == ci->i_cap_flush_tid[i])
+ (u16)flush_tid == ci->i_cap_flush_tid[i])
cleaned |= 1 << i;
dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 44ec72684df5..9c56ef776407 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -34,27 +34,9 @@
void
cifs_dump_mem(char *label, void *data, int length)
{
- int i, j;
- int *intptr = data;
- char *charptr = data;
- char buf[10], line[80];
-
- printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n",
- label, length, data);
- for (i = 0; i < length; i += 16) {
- line[0] = 0;
- for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
- sprintf(buf, " %08x", intptr[i / 4 + j]);
- strcat(line, buf);
- }
- buf[0] = ' ';
- buf[2] = 0;
- for (j = 0; (j < 16) && (i + j < length); j++) {
- buf[1] = isprint(charptr[i + j]) ? charptr[i + j] : '.';
- strcat(line, buf);
- }
- printk(KERN_DEBUG "%s\n", line);
- }
+ pr_debug("%s: dump of %d bytes of data at 0x%p\n", label, length, data);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 16, 4,
+ data, length, true);
}
#ifdef CONFIG_CIFS_DEBUG
@@ -68,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_ERR "CIFS VFS: %pV", &vaf);
+ pr_err("CIFS VFS: %pV", &vaf);
va_end(args);
}
@@ -274,6 +256,7 @@ static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
struct list_head *tmp1, *tmp2, *tmp3;
struct TCP_Server_Info *server;
@@ -284,7 +267,7 @@ static ssize_t cifs_stats_proc_write(struct file *file,
if (rc)
return rc;
- if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
+ if (strtobool(&c, &bv) == 0) {
#ifdef CONFIG_CIFS_STATS2
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
@@ -451,15 +434,14 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
rc = get_user(c, buffer);
if (rc)
return rc;
- if (c == '0' || c == 'n' || c == 'N')
- cifsFYI = 0;
- else if (c == '1' || c == 'y' || c == 'Y')
- cifsFYI = 1;
+ if (strtobool(&c, &bv) == 0)
+ cifsFYI = bv;
else if ((c > '1') && (c <= '9'))
cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
@@ -490,15 +472,18 @@ static ssize_t cifs_linux_ext_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
rc = get_user(c, buffer);
if (rc)
return rc;
- if (c == '0' || c == 'n' || c == 'N')
- linuxExtEnabled = 0;
- else if (c == '1' || c == 'y' || c == 'Y')
- linuxExtEnabled = 1;
+
+ rc = strtobool(&c, &bv);
+ if (rc)
+ return rc;
+
+ linuxExtEnabled = bv;
return count;
}
@@ -527,15 +512,18 @@ static ssize_t cifs_lookup_cache_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
rc = get_user(c, buffer);
if (rc)
return rc;
- if (c == '0' || c == 'n' || c == 'N')
- lookupCacheEnabled = 0;
- else if (c == '1' || c == 'y' || c == 'Y')
- lookupCacheEnabled = 1;
+
+ rc = strtobool(&c, &bv);
+ if (rc)
+ return rc;
+
+ lookupCacheEnabled = bv;
return count;
}
@@ -564,15 +552,18 @@ static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
char c;
+ bool bv;
int rc;
rc = get_user(c, buffer);
if (rc)
return rc;
- if (c == '0' || c == 'n' || c == 'N')
- traceSMB = 0;
- else if (c == '1' || c == 'y' || c == 'Y')
- traceSMB = 1;
+
+ rc = strtobool(&c, &bv);
+ if (rc)
+ return rc;
+
+ traceSMB = bv;
return count;
}
@@ -630,6 +621,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
unsigned int flags;
char flags_string[12];
char c;
+ bool bv;
if ((count < 1) || (count > 11))
return -EINVAL;
@@ -642,11 +634,8 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
if (count < 3) {
/* single char or single char followed by null */
c = flags_string[0];
- if (c == '0' || c == 'n' || c == 'N') {
- global_secflags = CIFSSEC_DEF; /* default */
- return count;
- } else if (c == '1' || c == 'y' || c == 'Y') {
- global_secflags = CIFSSEC_MAX;
+ if (strtobool(&c, &bv) == 0) {
+ global_secflags = bv ? CIFSSEC_MAX : CIFSSEC_DEF;
return count;
} else if (!isdigit(c)) {
cifs_dbg(VFS, "Invalid SecurityFlags: %s\n",
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index c99b40fb609b..f40fbaca1b2a 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -53,13 +53,12 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...);
do { \
if (type == FYI) { \
if (cifsFYI & CIFS_INFO) { \
- printk(KERN_DEBUG "%s: " fmt, \
- __FILE__, ##__VA_ARGS__); \
+ pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \
} \
} else if (type == VFS) { \
cifs_vfs_err(fmt, ##__VA_ARGS__); \
} else if (type == NOISY && type != 0) { \
- printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+ pr_debug(fmt, ##__VA_ARGS__); \
} \
} while (0)
@@ -71,7 +70,7 @@ do { \
#define cifs_dbg(type, fmt, ...) \
do { \
if (0) \
- printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+ pr_debug(fmt, ##__VA_ARGS__); \
} while (0)
#endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 002e0c173939..252f5c15806b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.05"
+#define CIFS_VERSION "2.06"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 24fa08d261fb..2a772da16b83 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1466,9 +1466,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->seal = 1;
break;
case Opt_noac:
- printk(KERN_WARNING "CIFS: Mount option noac not "
- "supported. Instead set "
- "/proc/fs/cifs/LookupCacheEnabled to 0\n");
+ pr_warn("CIFS: Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n");
break;
case Opt_fsc:
#ifndef CONFIG_CIFS_FSCACHE
@@ -1598,7 +1596,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (strnlen(string, CIFS_MAX_USERNAME_LEN) >
CIFS_MAX_USERNAME_LEN) {
- printk(KERN_WARNING "CIFS: username too long\n");
+ pr_warn("CIFS: username too long\n");
goto cifs_parse_mount_err;
}
vol->username = kstrdup(string, GFP_KERNEL);
@@ -1662,8 +1660,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
temp_len = strlen(value);
vol->password = kzalloc(temp_len+1, GFP_KERNEL);
if (vol->password == NULL) {
- printk(KERN_WARNING "CIFS: no memory "
- "for password\n");
+ pr_warn("CIFS: no memory for password\n");
goto cifs_parse_mount_err;
}
@@ -1687,8 +1684,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (!cifs_convert_address(dstaddr, string,
strlen(string))) {
- printk(KERN_ERR "CIFS: bad ip= option (%s).\n",
- string);
+ pr_err("CIFS: bad ip= option (%s).\n", string);
goto cifs_parse_mount_err;
}
got_ip = true;
@@ -1700,15 +1696,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
== CIFS_MAX_DOMAINNAME_LEN) {
- printk(KERN_WARNING "CIFS: domain name too"
- " long\n");
+ pr_warn("CIFS: domain name too long\n");
goto cifs_parse_mount_err;
}
vol->domainname = kstrdup(string, GFP_KERNEL);
if (!vol->domainname) {
- printk(KERN_WARNING "CIFS: no memory "
- "for domainname\n");
+ pr_warn("CIFS: no memory for domainname\n");
goto cifs_parse_mount_err;
}
cifs_dbg(FYI, "Domain name set\n");
@@ -1721,8 +1715,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (!cifs_convert_address(
(struct sockaddr *)&vol->srcaddr,
string, strlen(string))) {
- printk(KERN_WARNING "CIFS: Could not parse"
- " srcaddr: %s\n", string);
+ pr_warn("CIFS: Could not parse srcaddr: %s\n",
+ string);
goto cifs_parse_mount_err;
}
break;
@@ -1732,8 +1726,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto out_nomem;
if (strnlen(string, 1024) >= 65) {
- printk(KERN_WARNING "CIFS: iocharset name "
- "too long.\n");
+ pr_warn("CIFS: iocharset name too long.\n");
goto cifs_parse_mount_err;
}
@@ -1741,8 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
vol->iocharset = kstrdup(string,
GFP_KERNEL);
if (!vol->iocharset) {
- printk(KERN_WARNING "CIFS: no memory"
- "for charset\n");
+ pr_warn("CIFS: no memory for charset\n");
goto cifs_parse_mount_err;
}
}
@@ -1773,9 +1765,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
* set at top of the function
*/
if (i == RFC1001_NAME_LEN && string[i] != 0)
- printk(KERN_WARNING "CIFS: netbiosname"
- " longer than 15 truncated.\n");
-
+ pr_warn("CIFS: netbiosname longer than 15 truncated.\n");
break;
case Opt_servern:
/* servernetbiosname specified override *SMBSERVER */
@@ -1801,8 +1791,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
/* The string has 16th byte zero still from
set at top of the function */
if (i == RFC1001_NAME_LEN && string[i] != 0)
- printk(KERN_WARNING "CIFS: server net"
- "biosname longer than 15 truncated.\n");
+ pr_warn("CIFS: server netbiosname longer than 15 truncated.\n");
break;
case Opt_ver:
string = match_strdup(args);
@@ -1814,8 +1803,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
break;
}
/* For all other value, error */
- printk(KERN_WARNING "CIFS: Invalid version"
- " specified\n");
+ pr_warn("CIFS: Invalid version specified\n");
goto cifs_parse_mount_err;
case Opt_vers:
string = match_strdup(args);
@@ -1856,7 +1844,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
}
if (!sloppy && invalid) {
- printk(KERN_ERR "CIFS: Unknown mount option \"%s\"\n", invalid);
+ pr_err("CIFS: Unknown mount option \"%s\"\n", invalid);
goto cifs_parse_mount_err;
}
@@ -1882,8 +1870,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
/* No ip= option specified? Try to get it from UNC */
if (!cifs_convert_address(dstaddr, &vol->UNC[2],
strlen(&vol->UNC[2]))) {
- printk(KERN_ERR "Unable to determine destination "
- "address.\n");
+ pr_err("Unable to determine destination address.\n");
goto cifs_parse_mount_err;
}
}
@@ -1894,20 +1881,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (uid_specified)
vol->override_uid = override_uid;
else if (override_uid == 1)
- printk(KERN_NOTICE "CIFS: ignoring forceuid mount option "
- "specified with no uid= option.\n");
+ pr_notice("CIFS: ignoring forceuid mount option specified with no uid= option.\n");
if (gid_specified)
vol->override_gid = override_gid;
else if (override_gid == 1)
- printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
- "specified with no gid= option.\n");
+ pr_notice("CIFS: ignoring forcegid mount option specified with no gid= option.\n");
kfree(mountdata_copy);
return 0;
out_nomem:
- printk(KERN_WARNING "Could not allocate temporary buffer\n");
+ pr_warn("Could not allocate temporary buffer\n");
cifs_parse_mount_err:
kfree(string);
kfree(mountdata_copy);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b7415d596dbd..337946355b29 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -513,39 +513,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
void
dump_smb(void *buf, int smb_buf_length)
{
- int i, j;
- char debug_line[17];
- unsigned char *buffer = buf;
-
if (traceSMB == 0)
return;
- for (i = 0, j = 0; i < smb_buf_length; i++, j++) {
- if (i % 8 == 0) {
- /* have reached the beginning of line */
- printk(KERN_DEBUG "| ");
- j = 0;
- }
- printk("%0#4x ", buffer[i]);
- debug_line[2 * j] = ' ';
- if (isprint(buffer[i]))
- debug_line[1 + (2 * j)] = buffer[i];
- else
- debug_line[1 + (2 * j)] = '_';
-
- if (i % 8 == 7) {
- /* reached end of line, time to print ascii */
- debug_line[16] = 0;
- printk(" | %s\n", debug_line);
- }
- }
- for (; j < 8; j++) {
- printk(" ");
- debug_line[2 * j] = ' ';
- debug_line[1 + (2 * j)] = ' ';
- }
- printk(" | %s\n", debug_line);
- return;
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 8, 2, buf,
+ smb_buf_length, true);
}
void
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 8fd2a95860ba..803030c9ab68 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -794,10 +794,6 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
if it before then restart search
if after then keep searching till find it */
- if (file->private_data == NULL) {
- rc = -EINVAL;
- goto rddir2_exit;
- }
cifsFile = file->private_data;
if (cifsFile->srch_inf.endOfSearch) {
if (cifsFile->srch_inf.emptyDir) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 57db63ff88da..446cb7fb3f58 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -1303,6 +1303,11 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
+ if (ses->Suid != smb_buf->Uid) {
+ ses->Suid = smb_buf->Uid;
+ cifs_dbg(FYI, "UID changed! new UID = %llu\n", ses->Suid);
+ }
+
bytes_remaining = get_bcc(smb_buf);
bcc_ptr = pByteArea(smb_buf);
blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c5f521bcdee2..568f323665c8 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1102,6 +1102,64 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
+ loff_t off, loff_t len, bool keep_size)
+{
+ struct inode *inode;
+ struct cifsInodeInfo *cifsi;
+ struct cifsFileInfo *cfile = file->private_data;
+ long rc = -EOPNOTSUPP;
+ unsigned int xid;
+
+ xid = get_xid();
+
+ inode = cfile->dentry->d_inode;
+ cifsi = CIFS_I(inode);
+
+ /* if file not oplocked can't be sure whether asking to extend size */
+ if (!CIFS_CACHE_READ(cifsi))
+ if (keep_size == false)
+ return -EOPNOTSUPP;
+
+ /*
+ * Files are non-sparse by default so falloc may be a no-op
+ * Must check if file sparse. If not sparse, and not extending
+ * then no need to do anything since file already allocated
+ */
+ if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) {
+ if (keep_size == true)
+ return 0;
+ /* check if extending file */
+ else if (i_size_read(inode) >= off + len)
+ /* not extending file and already not sparse */
+ return 0;
+ /* BB: in future add else clause to extend file */
+ else
+ return -EOPNOTSUPP;
+ }
+
+ if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
+ /*
+ * Check if falloc starts within first few pages of file
+ * and ends within a few pages of the end of file to
+ * ensure that most of file is being forced to be
+ * fallocated now. If so then setting whole file sparse
+ * ie potentially making a few extra pages at the beginning
+ * or end of the file non-sparse via set_sparse is harmless.
+ */
+ if ((off > 8192) || (off + len + 8192 < i_size_read(inode)))
+ return -EOPNOTSUPP;
+
+ rc = smb2_set_sparse(xid, tcon, cfile, inode, false);
+ }
+ /* BB: else ... in future add code to extend file and set sparse */
+
+
+ free_xid(xid);
+ return rc;
+}
+
+
static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
loff_t off, loff_t len)
{
@@ -1112,7 +1170,10 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
if (mode & FALLOC_FL_KEEP_SIZE)
return smb3_zero_range(file, tcon, off, len, true);
return smb3_zero_range(file, tcon, off, len, false);
- }
+ } else if (mode == FALLOC_FL_KEEP_SIZE)
+ return smb3_simple_falloc(file, tcon, off, len, true);
+ else if (mode == 0)
+ return smb3_simple_falloc(file, tcon, off, len, false);
return -EOPNOTSUPP;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f1672bb82d5..0ca7f6364754 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -431,8 +431,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
if (rc)
goto neg_exit;
if (blob_length)
- rc = decode_neg_token_init(security_blob, blob_length,
- &server->sec_type);
+ rc = decode_negTokenInit(security_blob, blob_length, server);
if (rc == 1)
rc = 0;
else if (rc == 0) {
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index e3188abdafd0..d84f46c5b2c5 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -836,6 +836,25 @@ struct smb2_query_directory_rsp {
#define SMB2_O_INFO_SECURITY 0x03
#define SMB2_O_INFO_QUOTA 0x04
+/* Security info type additionalinfo flags. See MS-SMB2 (2.2.37) or MS-DTYP */
+#define OWNER_SECINFO 0x00000001
+#define GROUP_SECINFO 0x00000002
+#define DACL_SECINFO 0x00000004
+#define SACL_SECINFO 0x00000008
+#define LABEL_SECINFO 0x00000010
+#define ATTRIBUTE_SECINFO 0x00000020
+#define SCOPE_SECINFO 0x00000040
+#define BACKUP_SECINFO 0x00010000
+#define UNPROTECTED_SACL_SECINFO 0x10000000
+#define UNPROTECTED_DACL_SECINFO 0x20000000
+#define PROTECTED_SACL_SECINFO 0x40000000
+#define PROTECTED_DACL_SECINFO 0x80000000
+
+/* Flags used for FileFullEAinfo */
+#define SL_RESTART_SCAN 0x00000001
+#define SL_RETURN_SINGLE_ENTRY 0x00000002
+#define SL_INDEX_SPECIFIED 0x00000004
+
struct smb2_query_info_req {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 41 */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 9d087f4e7d4e..126f46b887cc 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -99,9 +99,9 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
something is wrong, unless it is quite a slow link or server */
if ((now - midEntry->when_alloc) > HZ) {
if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) {
- printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %llu",
+ pr_debug(" CIFS slow rsp: cmd %d mid %llu",
midEntry->command, midEntry->mid);
- printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
+ pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
now - midEntry->when_alloc,
now - midEntry->when_sent,
now - midEntry->when_received);
diff --git a/fs/dcache.c b/fs/dcache.c
index 3ffef7f4e5cd..5bc72b07fde2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -778,6 +778,7 @@ restart:
struct dentry *parent = lock_parent(dentry);
if (likely(!dentry->d_lockref.count)) {
__dentry_kill(dentry);
+ dput(parent);
goto restart;
}
if (parent)
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 0a48886e069c..6dad1176ec52 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -236,6 +236,7 @@ static void efivarfs_kill_sb(struct super_block *sb)
}
static struct file_system_type efivarfs_type = {
+ .owner = THIS_MODULE,
.name = "efivarfs",
.mount = efivarfs_mount,
.kill_sb = efivarfs_kill_sb,
@@ -244,17 +245,23 @@ static struct file_system_type efivarfs_type = {
static __init int efivarfs_init(void)
{
if (!efi_enabled(EFI_RUNTIME_SERVICES))
- return 0;
+ return -ENODEV;
if (!efivars_kobject())
- return 0;
+ return -ENODEV;
return register_filesystem(&efivarfs_type);
}
+static __exit void efivarfs_exit(void)
+{
+ unregister_filesystem(&efivarfs_type);
+}
+
MODULE_AUTHOR("Matthew Garrett, Jeremy Kerr");
MODULE_DESCRIPTION("EFI Variable Filesystem");
MODULE_LICENSE("GPL");
MODULE_ALIAS_FS("efivarfs");
module_init(efivarfs_init);
+module_exit(efivarfs_exit);
diff --git a/fs/exec.c b/fs/exec.c
index 7302b75a9820..01aebe300200 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -277,6 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
goto err;
mm->stack_vm = mm->total_vm = 1;
+ arch_bprm_mm_init(mm, vma);
up_write(&mm->mmap_sem);
bprm->p = vma->vm_end - sizeof(void *);
return 0;
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 83b9b5a8d112..1ccb26bc2a0b 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -162,7 +162,8 @@ fail:
return ERR_PTR(-EINVAL);
}
-struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
+ struct page *dpage)
{
int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
void *value = NULL;
@@ -172,12 +173,13 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
if (type == ACL_TYPE_ACCESS)
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
- retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
+ retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage);
if (retval > 0) {
value = kmalloc(retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
- retval = f2fs_getxattr(inode, name_index, "", value, retval);
+ retval = f2fs_getxattr(inode, name_index, "", value,
+ retval, dpage);
}
if (retval > 0)
@@ -194,6 +196,11 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
return acl;
}
+struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+{
+ return __f2fs_get_acl(inode, type, NULL);
+}
+
static int __f2fs_set_acl(struct inode *inode, int type,
struct posix_acl *acl, struct page *ipage)
{
@@ -229,7 +236,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
if (acl) {
value = f2fs_acl_to_disk(acl, &size);
if (IS_ERR(value)) {
- cond_clear_inode_flag(fi, FI_ACL_MODE);
+ clear_inode_flag(fi, FI_ACL_MODE);
return (int)PTR_ERR(value);
}
}
@@ -240,7 +247,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
if (!error)
set_cached_acl(inode, type, acl);
- cond_clear_inode_flag(fi, FI_ACL_MODE);
+ clear_inode_flag(fi, FI_ACL_MODE);
return error;
}
@@ -249,12 +256,137 @@ int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return __f2fs_set_acl(inode, type, acl, NULL);
}
-int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
+/*
+ * Most part of f2fs_acl_clone, f2fs_acl_create_masq, f2fs_acl_create
+ * are copied from posix_acl.c
+ */
+static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl,
+ gfp_t flags)
+{
+ struct posix_acl *clone = NULL;
+
+ if (acl) {
+ int size = sizeof(struct posix_acl) + acl->a_count *
+ sizeof(struct posix_acl_entry);
+ clone = kmemdup(acl, size, flags);
+ if (clone)
+ atomic_set(&clone->a_refcount, 1);
+ }
+ return clone;
+}
+
+static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
+{
+ struct posix_acl_entry *pa, *pe;
+ struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
+ umode_t mode = *mode_p;
+ int not_equiv = 0;
+
+ /* assert(atomic_read(acl->a_refcount) == 1); */
+
+ FOREACH_ACL_ENTRY(pa, acl, pe) {
+ switch(pa->e_tag) {
+ case ACL_USER_OBJ:
+ pa->e_perm &= (mode >> 6) | ~S_IRWXO;
+ mode &= (pa->e_perm << 6) | ~S_IRWXU;
+ break;
+
+ case ACL_USER:
+ case ACL_GROUP:
+ not_equiv = 1;
+ break;
+
+ case ACL_GROUP_OBJ:
+ group_obj = pa;
+ break;
+
+ case ACL_OTHER:
+ pa->e_perm &= mode | ~S_IRWXO;
+ mode &= pa->e_perm | ~S_IRWXO;
+ break;
+
+ case ACL_MASK:
+ mask_obj = pa;
+ not_equiv = 1;
+ break;
+
+ default:
+ return -EIO;
+ }
+ }
+
+ if (mask_obj) {
+ mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
+ mode &= (mask_obj->e_perm << 3) | ~S_IRWXG;
+ } else {
+ if (!group_obj)
+ return -EIO;
+ group_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
+ mode &= (group_obj->e_perm << 3) | ~S_IRWXG;
+ }
+
+ *mode_p = (*mode_p & ~S_IRWXUGO) | mode;
+ return not_equiv;
+}
+
+static int f2fs_acl_create(struct inode *dir, umode_t *mode,
+ struct posix_acl **default_acl, struct posix_acl **acl,
+ struct page *dpage)
+{
+ struct posix_acl *p;
+ int ret;
+
+ if (S_ISLNK(*mode) || !IS_POSIXACL(dir))
+ goto no_acl;
+
+ p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage);
+ if (IS_ERR(p)) {
+ if (p == ERR_PTR(-EOPNOTSUPP))
+ goto apply_umask;
+ return PTR_ERR(p);
+ }
+
+ if (!p)
+ goto apply_umask;
+
+ *acl = f2fs_acl_clone(p, GFP_NOFS);
+ if (!*acl)
+ return -ENOMEM;
+
+ ret = f2fs_acl_create_masq(*acl, mode);
+ if (ret < 0) {
+ posix_acl_release(*acl);
+ return -ENOMEM;
+ }
+
+ if (ret == 0) {
+ posix_acl_release(*acl);
+ *acl = NULL;
+ }
+
+ if (!S_ISDIR(*mode)) {
+ posix_acl_release(p);
+ *default_acl = NULL;
+ } else {
+ *default_acl = p;
+ }
+ return 0;
+
+apply_umask:
+ *mode &= ~current_umask();
+no_acl:
+ *default_acl = NULL;
+ *acl = NULL;
+ return 0;
+}
+
+int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
+ struct page *dpage)
{
- struct posix_acl *default_acl, *acl;
+ struct posix_acl *default_acl = NULL, *acl = NULL;
int error = 0;
- error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+ error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage);
if (error)
return error;
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index e0864651cdc1..997ca8edb6cb 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -38,14 +38,15 @@ struct f2fs_acl_header {
extern struct posix_acl *f2fs_get_acl(struct inode *, int);
extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);
+extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
+ struct page *);
#else
#define f2fs_check_acl NULL
#define f2fs_get_acl NULL
#define f2fs_set_acl NULL
static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
- struct page *page)
+ struct page *ipage, struct page *dpage)
{
return 0;
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index dd10a031c052..e6c271fefaca 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -72,36 +72,36 @@ out:
return page;
}
-struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index)
-{
- bool readahead = false;
- struct page *page;
-
- page = find_get_page(META_MAPPING(sbi), index);
- if (!page || (page && !PageUptodate(page)))
- readahead = true;
- f2fs_put_page(page, 0);
-
- if (readahead)
- ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
- return get_meta_page(sbi, index);
-}
-
-static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
{
switch (type) {
case META_NAT:
- return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
+ break;
case META_SIT:
- return SIT_BLK_CNT(sbi);
+ if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
+ return false;
+ break;
case META_SSA:
+ if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
+ blkaddr < SM_I(sbi)->ssa_blkaddr))
+ return false;
+ break;
case META_CP:
- return 0;
+ if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
+ blkaddr < __start_cp_addr(sbi)))
+ return false;
+ break;
case META_POR:
- return MAX_BLKADDR(sbi);
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+ blkaddr < MAIN_BLKADDR(sbi)))
+ return false;
+ break;
default:
BUG();
}
+
+ return true;
}
/*
@@ -112,7 +112,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
block_t prev_blk_addr = 0;
struct page *page;
block_t blkno = start;
- block_t max_blks = get_max_meta_blks(sbi, type);
struct f2fs_io_info fio = {
.type = META,
@@ -122,18 +121,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
for (; nrpages-- > 0; blkno++) {
block_t blk_addr;
+ if (!is_valid_blkaddr(sbi, blkno, type))
+ goto out;
+
switch (type) {
case META_NAT:
- /* get nat block addr */
- if (unlikely(blkno >= max_blks))
+ if (unlikely(blkno >=
+ NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
blkno = 0;
+ /* get nat block addr */
blk_addr = current_nat_addr(sbi,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
/* get sit block addr */
- if (unlikely(blkno >= max_blks))
- goto out;
blk_addr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
if (blkno != start && prev_blk_addr + 1 != blk_addr)
@@ -143,10 +144,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
case META_SSA:
case META_CP:
case META_POR:
- if (unlikely(blkno >= max_blks))
- goto out;
- if (unlikely(blkno < SEG0_BLKADDR(sbi)))
- goto out;
blk_addr = blkno;
break;
default:
@@ -169,6 +166,20 @@ out:
return blkno - start;
}
+void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct page *page;
+ bool readahead = false;
+
+ page = find_get_page(META_MAPPING(sbi), index);
+ if (!page || (page && !PageUptodate(page)))
+ readahead = true;
+ f2fs_put_page(page, 0);
+
+ if (readahead)
+ ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
+}
+
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
{
@@ -178,7 +189,7 @@ static int f2fs_write_meta_page(struct page *page,
if (unlikely(sbi->por_doing))
goto redirty_out;
- if (wbc->for_reclaim)
+ if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
goto redirty_out;
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
@@ -187,6 +198,9 @@ static int f2fs_write_meta_page(struct page *page,
write_meta_page(sbi, page);
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, META, WRITE);
return 0;
redirty_out:
@@ -298,46 +312,57 @@ const struct address_space_operations f2fs_meta_aops = {
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
+ struct inode_management *im = &sbi->im[type];
struct ino_entry *e;
retry:
- spin_lock(&sbi->ino_lock[type]);
+ if (radix_tree_preload(GFP_NOFS)) {
+ cond_resched();
+ goto retry;
+ }
+
+ spin_lock(&im->ino_lock);
- e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ e = radix_tree_lookup(&im->ino_root, ino);
if (!e) {
e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
if (!e) {
- spin_unlock(&sbi->ino_lock[type]);
+ spin_unlock(&im->ino_lock);
+ radix_tree_preload_end();
goto retry;
}
- if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
- spin_unlock(&sbi->ino_lock[type]);
+ if (radix_tree_insert(&im->ino_root, ino, e)) {
+ spin_unlock(&im->ino_lock);
kmem_cache_free(ino_entry_slab, e);
+ radix_tree_preload_end();
goto retry;
}
memset(e, 0, sizeof(struct ino_entry));
e->ino = ino;
- list_add_tail(&e->list, &sbi->ino_list[type]);
+ list_add_tail(&e->list, &im->ino_list);
+ if (type != ORPHAN_INO)
+ im->ino_num++;
}
- spin_unlock(&sbi->ino_lock[type]);
+ spin_unlock(&im->ino_lock);
+ radix_tree_preload_end();
}
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
+ struct inode_management *im = &sbi->im[type];
struct ino_entry *e;
- spin_lock(&sbi->ino_lock[type]);
- e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
if (e) {
list_del(&e->list);
- radix_tree_delete(&sbi->ino_root[type], ino);
- if (type == ORPHAN_INO)
- sbi->n_orphans--;
- spin_unlock(&sbi->ino_lock[type]);
+ radix_tree_delete(&im->ino_root, ino);
+ im->ino_num--;
+ spin_unlock(&im->ino_lock);
kmem_cache_free(ino_entry_slab, e);
return;
}
- spin_unlock(&sbi->ino_lock[type]);
+ spin_unlock(&im->ino_lock);
}
void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -355,10 +380,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
+ struct inode_management *im = &sbi->im[mode];
struct ino_entry *e;
- spin_lock(&sbi->ino_lock[mode]);
- e = radix_tree_lookup(&sbi->ino_root[mode], ino);
- spin_unlock(&sbi->ino_lock[mode]);
+
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
+ spin_unlock(&im->ino_lock);
return e ? true : false;
}
@@ -368,36 +395,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi)
int i;
for (i = APPEND_INO; i <= UPDATE_INO; i++) {
- spin_lock(&sbi->ino_lock[i]);
- list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+ struct inode_management *im = &sbi->im[i];
+
+ spin_lock(&im->ino_lock);
+ list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
list_del(&e->list);
- radix_tree_delete(&sbi->ino_root[i], e->ino);
+ radix_tree_delete(&im->ino_root, e->ino);
kmem_cache_free(ino_entry_slab, e);
+ im->ino_num--;
}
- spin_unlock(&sbi->ino_lock[i]);
+ spin_unlock(&im->ino_lock);
}
}
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
int err = 0;
- spin_lock(&sbi->ino_lock[ORPHAN_INO]);
- if (unlikely(sbi->n_orphans >= sbi->max_orphans))
+ spin_lock(&im->ino_lock);
+ if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
- sbi->n_orphans++;
- spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+ im->ino_num++;
+ spin_unlock(&im->ino_lock);
return err;
}
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
- spin_lock(&sbi->ino_lock[ORPHAN_INO]);
- f2fs_bug_on(sbi, sbi->n_orphans == 0);
- sbi->n_orphans--;
- spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
+
+ spin_lock(&im->ino_lock);
+ f2fs_bug_on(sbi, im->ino_num == 0);
+ im->ino_num--;
+ spin_unlock(&im->ino_lock);
}
void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -460,17 +493,19 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
struct f2fs_orphan_block *orphan_blk = NULL;
unsigned int nentries = 0;
unsigned short index;
- unsigned short orphan_blocks =
- (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
+ unsigned short orphan_blocks;
struct page *page = NULL;
struct ino_entry *orphan = NULL;
+ struct inode_management *im = &sbi->im[ORPHAN_INO];
+
+ orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);
index = 1;
- spin_lock(&sbi->ino_lock[ORPHAN_INO]);
- head = &sbi->ino_list[ORPHAN_INO];
+ spin_lock(&im->ino_lock);
+ head = &im->ino_list;
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
@@ -510,7 +545,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
f2fs_put_page(page, 1);
}
- spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+ spin_unlock(&im->ino_lock);
}
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -731,6 +766,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
struct dir_inode_entry *entry;
struct inode *inode;
retry:
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
spin_lock(&sbi->dir_inode_lock);
head = &sbi->dir_inode_list;
@@ -830,6 +868,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct f2fs_nm_info *nm_i = NM_I(sbi);
+ unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
block_t start_blk;
struct page *cp_page;
@@ -889,7 +928,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
- orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
+ orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
orphan_blocks);
@@ -905,7 +944,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
orphan_blocks);
}
- if (sbi->n_orphans)
+ if (orphan_num)
set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
else
clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
@@ -940,7 +979,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_put_page(cp_page, 1);
}
- if (sbi->n_orphans) {
+ if (orphan_num) {
write_orphan_inodes(sbi, start_blk);
start_blk += orphan_blocks;
}
@@ -975,6 +1014,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
+ /* wait for previous submitted meta pages writeback */
+ wait_on_all_pages_writeback(sbi);
+
release_dirty_inode(sbi);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1036,9 +1078,12 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
int i;
for (i = 0; i < MAX_INO_ENTRY; i++) {
- INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
- spin_lock_init(&sbi->ino_lock[i]);
- INIT_LIST_HEAD(&sbi->ino_list[i]);
+ struct inode_management *im = &sbi->im[i];
+
+ INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
+ spin_lock_init(&im->ino_lock);
+ INIT_LIST_HEAD(&im->ino_list);
+ im->ino_num = 0;
}
/*
@@ -1047,7 +1092,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*504 orphan entries
*/
- sbi->n_orphans = 0;
sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8e58c4cc2cb9..7ec697b37f19 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -61,11 +61,6 @@ static void f2fs_write_end_io(struct bio *bio, int err)
dec_page_count(sbi, F2FS_WRITEBACK);
}
- if (sbi->wait_io) {
- complete(sbi->wait_io);
- sbi->wait_io = NULL;
- }
-
if (!get_pages(sbi, F2FS_WRITEBACK) &&
!list_empty(&sbi->cp_wait.task_list))
wake_up(&sbi->cp_wait);
@@ -95,34 +90,18 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
- int rw;
if (!io->bio)
return;
- rw = fio->rw;
-
- if (is_read_io(rw)) {
- trace_f2fs_submit_read_bio(io->sbi->sb, rw,
- fio->type, io->bio);
- submit_bio(rw, io->bio);
- } else {
- trace_f2fs_submit_write_bio(io->sbi->sb, rw,
- fio->type, io->bio);
- /*
- * META_FLUSH is only from the checkpoint procedure, and we
- * should wait this metadata bio for FS consistency.
- */
- if (fio->type == META_FLUSH) {
- DECLARE_COMPLETION_ONSTACK(wait);
- io->sbi->wait_io = &wait;
- submit_bio(rw, io->bio);
- wait_for_completion(&wait);
- } else {
- submit_bio(rw, io->bio);
- }
- }
+ if (is_read_io(fio->rw))
+ trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw,
+ fio->type, io->bio);
+ else
+ trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw,
+ fio->type, io->bio);
+ submit_bio(fio->rw, io->bio);
io->bio = NULL;
}
@@ -257,9 +236,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
bool need_put = dn->inode_page ? false : true;
int err;
- /* if inode_page exists, index should be zero */
- f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index);
-
err = get_dnode_of_data(dn, index, ALLOC_NODE);
if (err)
return err;
@@ -740,14 +716,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
static int f2fs_read_data_page(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
- int ret;
+ int ret = -EAGAIN;
trace_f2fs_readpage(page, DATA);
/* If the file has inline data, try to read it directly */
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
- else
+ if (ret == -EAGAIN)
ret = mpage_readpage(page, get_data_block);
return ret;
@@ -859,10 +835,11 @@ write:
else if (has_not_enough_free_secs(sbi, 0))
goto redirty_out;
+ err = -EAGAIN;
f2fs_lock_op(sbi);
- if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
- err = f2fs_write_inline_data(inode, page, offset);
- else
+ if (f2fs_has_inline_data(inode))
+ err = f2fs_write_inline_data(inode, page);
+ if (err == -EAGAIN)
err = do_write_data_page(page, &fio);
f2fs_unlock_op(sbi);
done:
@@ -951,7 +928,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct page *page;
+ struct page *page, *ipage;
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
struct dnode_of_data dn;
int err = 0;
@@ -959,45 +936,60 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
trace_f2fs_write_begin(inode, pos, len, flags);
f2fs_balance_fs(sbi);
-repeat:
- err = f2fs_convert_inline_data(inode, pos + len, NULL);
- if (err)
- goto fail;
+ /*
+ * We should check this at this moment to avoid deadlock on inode page
+ * and #0 page. The locking rule for inline_data conversion should be:
+ * lock_page(page #0) -> lock_page(inode_page)
+ */
+ if (index != 0) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ goto fail;
+ }
+repeat:
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
err = -ENOMEM;
goto fail;
}
- /* to avoid latency during memory pressure */
- unlock_page(page);
-
*pagep = page;
- if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
- goto inline_data;
-
f2fs_lock_op(sbi);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_reserve_block(&dn, index);
- f2fs_unlock_op(sbi);
- if (err) {
- f2fs_put_page(page, 0);
- goto fail;
- }
-inline_data:
- lock_page(page);
- if (unlikely(page->mapping != mapping)) {
- f2fs_put_page(page, 1);
- goto repeat;
+
+ /* check inline_data */
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto unlock_fail;
}
- f2fs_wait_on_page_writeback(page, DATA);
+ set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+ if (f2fs_has_inline_data(inode)) {
+ if (pos + len <= MAX_INLINE_DATA) {
+ read_inline_data(page, ipage);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+ sync_inode_page(&dn);
+ goto put_next;
+ }
+ err = f2fs_convert_inline_page(&dn, page);
+ if (err)
+ goto put_fail;
+ }
+ err = f2fs_reserve_block(&dn, index);
+ if (err)
+ goto put_fail;
+put_next:
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
return 0;
+ f2fs_wait_on_page_writeback(page, DATA);
+
if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
unsigned start = pos & (PAGE_CACHE_SIZE - 1);
unsigned end = start + len;
@@ -1010,18 +1002,10 @@ inline_data:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
- if (f2fs_has_inline_data(inode)) {
- err = f2fs_read_inline_data(inode, page);
- if (err) {
- page_cache_release(page);
- goto fail;
- }
- } else {
- err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
- READ_SYNC);
- if (err)
- goto fail;
- }
+ err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
+ READ_SYNC);
+ if (err)
+ goto fail;
lock_page(page);
if (unlikely(!PageUptodate(page))) {
@@ -1038,6 +1022,12 @@ out:
SetPageUptodate(page);
clear_cold_data(page);
return 0;
+
+put_fail:
+ f2fs_put_dnode(&dn);
+unlock_fail:
+ f2fs_unlock_op(sbi);
+ f2fs_put_page(page, 1);
fail:
f2fs_write_failed(mapping, pos + len);
return err;
@@ -1052,10 +1042,7 @@ static int f2fs_write_end(struct file *file,
trace_f2fs_write_end(inode, pos, len, copied);
- if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
- register_inmem_page(inode, page);
- else
- set_page_dirty(page);
+ set_page_dirty(page);
if (pos + copied > i_size_read(inode)) {
i_size_write(inode, pos + copied);
@@ -1093,9 +1080,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
int err;
- /* Let buffer I/O handle the inline data case. */
- if (f2fs_has_inline_data(inode))
- return 0;
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
if (check_direct_IO(inode, rw, iter, offset))
return 0;
@@ -1119,6 +1109,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)
return;
+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+ invalidate_inmem_page(inode, page);
+
if (PageDirty(page))
inode_dec_dirty_pages(inode);
ClearPagePrivate(page);
@@ -1138,6 +1131,12 @@ static int f2fs_set_data_page_dirty(struct page *page)
trace_f2fs_set_page_dirty(page, DATA);
SetPageUptodate(page);
+
+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) {
+ register_inmem_page(inode, page);
+ return 1;
+ }
+
mark_inode_dirty(inode);
if (!PageDirty(page)) {
@@ -1152,9 +1151,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
- if (f2fs_has_inline_data(inode))
- return 0;
-
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ int err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
return generic_block_bmap(mapping, block, get_data_block);
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0a91ab813a9e..91e8f699ab30 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -39,13 +39,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_dirs = sbi->n_dirty_dirs;
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
+ si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
si->valid_count = valid_user_blocks(sbi);
si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi);
- si->inline_inode = sbi->inline_inode;
+ si->inline_inode = atomic_read(&sbi->inline_inode);
+ si->inline_dir = atomic_read(&sbi->inline_dir);
si->utilization = utilization(sbi);
si->free_segs = free_segments(sbi);
@@ -118,6 +120,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned npages;
+ int i;
if (si->base_mem)
goto get_cache;
@@ -167,8 +170,9 @@ get_cache:
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
+ for (i = 0; i <= UPDATE_INO; i++)
+ si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
}
static int stat_show(struct seq_file *s, void *v)
@@ -200,6 +204,8 @@ static int stat_show(struct seq_file *s, void *v)
si->valid_count - si->valid_node_count);
seq_printf(s, " - Inline_data Inode: %u\n",
si->inline_inode);
+ seq_printf(s, " - Inline_dentry Inode: %u\n",
+ si->inline_dir);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@@ -244,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
si->hit_ext, si->total_ext);
seq_puts(s, "\nBalancing F2FS Async:\n");
+ seq_printf(s, " - inmem: %4d\n",
+ si->inmem_pages);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d\n",
@@ -321,6 +329,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->sbi = sbi;
sbi->stat_info = si;
+ atomic_set(&sbi->inline_inode, 0);
+ atomic_set(&sbi->inline_dir, 0);
+
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b54f87149c09..b1a7d5737cd0 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}
-static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
+unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
[F2FS_FT_UNKNOWN] = DT_UNKNOWN,
[F2FS_FT_REG_FILE] = DT_REG,
[F2FS_FT_DIR] = DT_DIR,
@@ -59,7 +59,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
};
-static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
+void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
{
umode_t mode = inode->i_mode;
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
@@ -90,51 +90,70 @@ static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
- struct qstr *name, int *max_slots,
- f2fs_hash_t namehash, struct page **res_page)
+ struct qstr *name, int *max_slots,
+ struct page **res_page)
+{
+ struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dir_entry *de;
+ struct f2fs_dentry_ptr d;
+
+ dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ de = find_target_dentry(name, max_slots, &d);
+
+ if (de)
+ *res_page = dentry_page;
+ else
+ kunmap(dentry_page);
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs has occurred.
+ */
+ f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0);
+ return de;
+}
+
+struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
+ struct f2fs_dentry_ptr *d)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
- struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
- const void *dentry_bits = &dentry_blk->dentry_bitmap;
+ f2fs_hash_t namehash = f2fs_dentry_hash(name);
int max_len = 0;
- while (bit_pos < NR_DENTRY_IN_BLOCK) {
- if (!test_bit_le(bit_pos, dentry_bits)) {
+ if (max_slots)
+ *max_slots = 0;
+ while (bit_pos < d->max) {
+ if (!test_bit_le(bit_pos, d->bitmap)) {
if (bit_pos == 0)
max_len = 1;
- else if (!test_bit_le(bit_pos - 1, dentry_bits))
+ else if (!test_bit_le(bit_pos - 1, d->bitmap))
max_len++;
bit_pos++;
continue;
}
- de = &dentry_blk->dentry[bit_pos];
- if (early_match_name(name->len, namehash, de)) {
- if (!memcmp(dentry_blk->filename[bit_pos],
- name->name,
- name->len)) {
- *res_page = dentry_page;
- goto found;
- }
- }
- if (max_len > *max_slots) {
+ de = &d->dentry[bit_pos];
+ if (early_match_name(name->len, namehash, de) &&
+ !memcmp(d->filename[bit_pos], name->name, name->len))
+ goto found;
+
+ if (max_slots && *max_slots >= 0 && max_len > *max_slots) {
*max_slots = max_len;
max_len = 0;
}
- /*
- * For the most part, it should be a bug when name_len is zero.
- * We stop here for figuring out where the bugs has occurred.
- */
- f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len);
+ /* remain bug on condition */
+ if (unlikely(!de->name_len))
+ d->max = -1;
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
de = NULL;
- kunmap(dentry_page);
found:
- if (max_len > *max_slots)
+ if (max_slots && max_len > *max_slots)
*max_slots = max_len;
return de;
}
@@ -149,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
bool room = false;
- int max_slots = 0;
+ int max_slots;
f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
@@ -168,8 +187,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
continue;
}
- de = find_in_block(dentry_page, name, &max_slots,
- namehash, res_page);
+ de = find_in_block(dentry_page, name, &max_slots, res_page);
if (de)
break;
@@ -201,6 +219,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
unsigned int max_depth;
unsigned int level;
+ if (f2fs_has_inline_dentry(dir))
+ return find_in_inline_dir(dir, child, res_page);
+
if (npages == 0)
return NULL;
@@ -227,6 +248,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
struct f2fs_dir_entry *de;
struct f2fs_dentry_block *dentry_blk;
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_parent_inline_dir(dir, p);
+
page = get_lock_data_page(dir, 0);
if (IS_ERR(page))
return NULL;
@@ -247,7 +271,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
de = f2fs_find_entry(dir, qstr, &page);
if (de) {
res = le32_to_cpu(de->ino);
- kunmap(page);
+ f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
}
@@ -257,11 +281,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode)
{
+ enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, type);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
- kunmap(page);
+ if (!f2fs_has_inline_dentry(dir))
+ kunmap(page);
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);
@@ -296,36 +322,48 @@ int update_dent_inode(struct inode *inode, const struct qstr *name)
return 0;
}
-static int make_empty_dir(struct inode *inode,
- struct inode *parent, struct page *page)
+void do_make_empty_dir(struct inode *inode, struct inode *parent,
+ struct f2fs_dentry_ptr *d)
{
- struct page *dentry_page;
- struct f2fs_dentry_block *dentry_blk;
struct f2fs_dir_entry *de;
- dentry_page = get_new_data_page(inode, page, 0, true);
- if (IS_ERR(dentry_page))
- return PTR_ERR(dentry_page);
-
-
- dentry_blk = kmap_atomic(dentry_page);
-
- de = &dentry_blk->dentry[0];
+ de = &d->dentry[0];
de->name_len = cpu_to_le16(1);
de->hash_code = 0;
de->ino = cpu_to_le32(inode->i_ino);
- memcpy(dentry_blk->filename[0], ".", 1);
+ memcpy(d->filename[0], ".", 1);
set_de_type(de, inode);
- de = &dentry_blk->dentry[1];
+ de = &d->dentry[1];
de->hash_code = 0;
de->name_len = cpu_to_le16(2);
de->ino = cpu_to_le32(parent->i_ino);
- memcpy(dentry_blk->filename[1], "..", 2);
+ memcpy(d->filename[1], "..", 2);
set_de_type(de, inode);
- test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
- test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
+ test_and_set_bit_le(0, (void *)d->bitmap);
+ test_and_set_bit_le(1, (void *)d->bitmap);
+}
+
+static int make_empty_dir(struct inode *inode,
+ struct inode *parent, struct page *page)
+{
+ struct page *dentry_page;
+ struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dentry_ptr d;
+
+ if (f2fs_has_inline_dentry(inode))
+ return make_empty_inline_dir(inode, parent, page);
+
+ dentry_page = get_new_data_page(inode, page, 0, true);
+ if (IS_ERR(dentry_page))
+ return PTR_ERR(dentry_page);
+
+ dentry_blk = kmap_atomic(dentry_page);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ do_make_empty_dir(inode, parent, &d);
+
kunmap_atomic(dentry_blk);
set_page_dirty(dentry_page);
@@ -333,8 +371,8 @@ static int make_empty_dir(struct inode *inode,
return 0;
}
-static struct page *init_inode_metadata(struct inode *inode,
- struct inode *dir, const struct qstr *name)
+struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+ const struct qstr *name, struct page *dpage)
{
struct page *page;
int err;
@@ -350,7 +388,7 @@ static struct page *init_inode_metadata(struct inode *inode,
goto error;
}
- err = f2fs_init_acl(inode, dir, page);
+ err = f2fs_init_acl(inode, dir, page, dpage);
if (err)
goto put_error;
@@ -395,7 +433,7 @@ error:
return ERR_PTR(err);
}
-static void update_parent_metadata(struct inode *dir, struct inode *inode,
+void update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth)
{
if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
@@ -417,27 +455,23 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
}
-static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots)
+int room_for_filename(const void *bitmap, int slots, int max_slots)
{
int bit_start = 0;
int zero_start, zero_end;
next:
- zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK,
- bit_start);
- if (zero_start >= NR_DENTRY_IN_BLOCK)
- return NR_DENTRY_IN_BLOCK;
+ zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start);
+ if (zero_start >= max_slots)
+ return max_slots;
- zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK,
- zero_start);
+ zero_end = find_next_bit_le(bitmap, max_slots, zero_start);
if (zero_end - zero_start >= slots)
return zero_start;
bit_start = zero_end + 1;
- if (zero_end + 1 >= NR_DENTRY_IN_BLOCK)
- return NR_DENTRY_IN_BLOCK;
+ if (zero_end + 1 >= max_slots)
+ return max_slots;
goto next;
}
@@ -463,6 +497,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
int err = 0;
int i;
+ if (f2fs_has_inline_dentry(dir)) {
+ err = f2fs_add_inline_entry(dir, name, inode);
+ if (!err || err != -EAGAIN)
+ return err;
+ else
+ err = 0;
+ }
+
dentry_hash = f2fs_dentry_hash(name);
level = 0;
current_depth = F2FS_I(dir)->i_current_depth;
@@ -491,7 +533,8 @@ start:
return PTR_ERR(dentry_page);
dentry_blk = kmap(dentry_page);
- bit_pos = room_for_filename(dentry_blk, slots);
+ bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ slots, NR_DENTRY_IN_BLOCK);
if (bit_pos < NR_DENTRY_IN_BLOCK)
goto add_dentry;
@@ -506,7 +549,7 @@ add_dentry:
f2fs_wait_on_page_writeback(dentry_page, DATA);
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, name);
+ page = init_inode_metadata(inode, dir, name, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -545,7 +588,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, NULL);
+ page = init_inode_metadata(inode, dir, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
@@ -560,26 +603,57 @@ fail:
return err;
}
+void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+
+ down_write(&F2FS_I(inode)->i_sem);
+
+ if (S_ISDIR(inode->i_mode)) {
+ drop_nlink(dir);
+ if (page)
+ update_inode(dir, page);
+ else
+ update_inode_page(dir);
+ }
+ inode->i_ctime = CURRENT_TIME;
+
+ drop_nlink(inode);
+ if (S_ISDIR(inode->i_mode)) {
+ drop_nlink(inode);
+ i_size_write(inode, 0);
+ }
+ up_write(&F2FS_I(inode)->i_sem);
+ update_inode_page(inode);
+
+ if (inode->i_nlink == 0)
+ add_orphan_inode(sbi, inode->i_ino);
+ else
+ release_orphan_inode(sbi);
+}
+
/*
* It only removes the dentry from the dentry page, corresponding name
* entry in name page does not need to be touched during deletion.
*/
void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
- struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
- struct inode *dir = page->mapping->host;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
int i;
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_delete_inline_entry(dentry, page, dir, inode);
+
lock_page(page);
f2fs_wait_on_page_writeback(page, DATA);
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
- test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+ clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
/* Let's check and deallocate this dentry page */
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
@@ -590,29 +664,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
- if (inode) {
- struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
-
- down_write(&F2FS_I(inode)->i_sem);
-
- if (S_ISDIR(inode->i_mode)) {
- drop_nlink(dir);
- update_inode_page(dir);
- }
- inode->i_ctime = CURRENT_TIME;
- drop_nlink(inode);
- if (S_ISDIR(inode->i_mode)) {
- drop_nlink(inode);
- i_size_write(inode, 0);
- }
- up_write(&F2FS_I(inode)->i_sem);
- update_inode_page(inode);
-
- if (inode->i_nlink == 0)
- add_orphan_inode(sbi, inode->i_ino);
- else
- release_orphan_inode(sbi);
- }
+ if (inode)
+ f2fs_drop_nlink(dir, inode, NULL);
if (bit_pos == NR_DENTRY_IN_BLOCK) {
truncate_hole(dir, page->index, page->index + 1);
@@ -628,9 +681,12 @@ bool f2fs_empty_dir(struct inode *dir)
unsigned long bidx;
struct page *dentry_page;
unsigned int bit_pos;
- struct f2fs_dentry_block *dentry_blk;
+ struct f2fs_dentry_block *dentry_blk;
unsigned long nblock = dir_blocks(dir);
+ if (f2fs_has_inline_dentry(dir))
+ return f2fs_empty_inline_dir(dir);
+
for (bidx = 0; bidx < nblock; bidx++) {
dentry_page = get_lock_data_page(dir, bidx);
if (IS_ERR(dentry_page)) {
@@ -640,7 +696,6 @@ bool f2fs_empty_dir(struct inode *dir)
return false;
}
-
dentry_blk = kmap_atomic(dentry_page);
if (bidx == 0)
bit_pos = 2;
@@ -659,19 +714,48 @@ bool f2fs_empty_dir(struct inode *dir)
return true;
}
+bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
+ unsigned int start_pos)
+{
+ unsigned char d_type = DT_UNKNOWN;
+ unsigned int bit_pos;
+ struct f2fs_dir_entry *de = NULL;
+
+ bit_pos = ((unsigned long)ctx->pos % d->max);
+
+ while (bit_pos < d->max) {
+ bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
+ if (bit_pos >= d->max)
+ break;
+
+ de = &d->dentry[bit_pos];
+ if (de->file_type < F2FS_FT_MAX)
+ d_type = f2fs_filetype_table[de->file_type];
+ else
+ d_type = DT_UNKNOWN;
+ if (!dir_emit(ctx, d->filename[bit_pos],
+ le16_to_cpu(de->name_len),
+ le32_to_cpu(de->ino), d_type))
+ return true;
+
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ ctx->pos = start_pos + bit_pos;
+ }
+ return false;
+}
+
static int f2fs_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
unsigned long npages = dir_blocks(inode);
- unsigned int bit_pos = 0;
struct f2fs_dentry_block *dentry_blk = NULL;
- struct f2fs_dir_entry *de = NULL;
struct page *dentry_page = NULL;
struct file_ra_state *ra = &file->f_ra;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
- unsigned char d_type = DT_UNKNOWN;
+ struct f2fs_dentry_ptr d;
- bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
+ if (f2fs_has_inline_dentry(inode))
+ return f2fs_read_inline_dir(file, ctx);
/* readahead for multi pages of dir */
if (npages - n > 1 && !ra_has_index(ra, n))
@@ -684,28 +768,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
continue;
dentry_blk = kmap(dentry_page);
- while (bit_pos < NR_DENTRY_IN_BLOCK) {
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK,
- bit_pos);
- if (bit_pos >= NR_DENTRY_IN_BLOCK)
- break;
-
- de = &dentry_blk->dentry[bit_pos];
- if (de->file_type < F2FS_FT_MAX)
- d_type = f2fs_filetype_table[de->file_type];
- else
- d_type = DT_UNKNOWN;
- if (!dir_emit(ctx,
- dentry_blk->filename[bit_pos],
- le16_to_cpu(de->name_len),
- le32_to_cpu(de->ino), d_type))
- goto stop;
- bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
- ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos;
- }
- bit_pos = 0;
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+
+ if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK))
+ goto stop;
+
ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8171e80b2ee9..ec58bb2373fc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -46,8 +46,10 @@
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
-#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
-#define F2FS_MOUNT_NOBARRIER 0x00000400
+#define F2FS_MOUNT_INLINE_DENTRY 0x00000200
+#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
+#define F2FS_MOUNT_NOBARRIER 0x00000800
+#define F2FS_MOUNT_FASTBOOT 0x00001000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -211,6 +213,32 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
/*
* For INODE and NODE manager
*/
+/* for directory operations */
+struct f2fs_dentry_ptr {
+ const void *bitmap;
+ struct f2fs_dir_entry *dentry;
+ __u8 (*filename)[F2FS_SLOT_LEN];
+ int max;
+};
+
+static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d,
+ void *src, int type)
+{
+ if (type == 1) {
+ struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
+ d->max = NR_DENTRY_IN_BLOCK;
+ d->bitmap = &t->dentry_bitmap;
+ d->dentry = t->dentry;
+ d->filename = t->filename;
+ } else {
+ struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
+ d->max = NR_INLINE_DENTRY;
+ d->bitmap = &t->dentry_bitmap;
+ d->dentry = t->dentry;
+ d->filename = t->filename;
+ }
+}
+
/*
* XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1
* as its node offset to distinguish from index node blocks.
@@ -269,6 +297,7 @@ struct f2fs_inode_info {
struct extent_info ext; /* in-memory extent cache entry */
struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
+ struct radix_tree_root inmem_root; /* radix tree for inmem pages */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
};
@@ -303,7 +332,7 @@ struct f2fs_nm_info {
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
- rwlock_t nat_tree_lock; /* protect nat_tree_lock */
+ struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
@@ -433,6 +462,7 @@ enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
+ F2FS_INMEM_PAGES,
NR_COUNT_TYPE,
};
@@ -470,6 +500,14 @@ struct f2fs_bio_info {
struct rw_semaphore io_rwsem; /* blocking op for bio */
};
+/* for inner inode cache management */
+struct inode_management {
+ struct radix_tree_root ino_root; /* ino entry array */
+ spinlock_t ino_lock; /* for ino entry lock */
+ struct list_head ino_list; /* inode list head */
+ unsigned long ino_num; /* number of entries */
+};
+
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
@@ -488,7 +526,6 @@ struct f2fs_sb_info {
/* for bio operations */
struct f2fs_bio_info read_io; /* for read bios */
struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
- struct completion *wait_io; /* for completion bios */
/* for checkpoint */
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -500,13 +537,9 @@ struct f2fs_sb_info {
bool por_doing; /* recovery is doing or not */
wait_queue_head_t cp_wait;
- /* for inode management */
- struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
- spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
- struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
+ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
/* for orphan inode, use 0'th array */
- unsigned int n_orphans; /* # of orphan inodes */
unsigned int max_orphans; /* max orphan inodes */
/* for directory inode management */
@@ -557,7 +590,8 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
- int inline_inode; /* # of inline_data inodes */
+ atomic_t inline_inode; /* # of inline_data inodes */
+ atomic_t inline_dir; /* # of inline_dentry inodes */
int bg_gc; /* background gc calls */
unsigned int n_dirty_dirs; /* # of dir inodes */
#endif
@@ -988,6 +1022,13 @@ retry:
return entry;
}
+static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
+ unsigned long index, void *item)
+{
+ while (radix_tree_insert(root, index, item))
+ cond_resched();
+}
+
#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
static inline bool IS_INODE(struct page *page)
@@ -1020,7 +1061,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr)
return mask & *addr;
}
-static inline int f2fs_set_bit(unsigned int nr, char *addr)
+static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr)
{
int mask;
int ret;
@@ -1032,7 +1073,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr)
return ret;
}
-static inline int f2fs_clear_bit(unsigned int nr, char *addr)
+static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr)
{
int mask;
int ret;
@@ -1044,6 +1085,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)
return ret;
}
+static inline void f2fs_change_bit(unsigned int nr, char *addr)
+{
+ int mask;
+
+ addr += (nr >> 3);
+ mask = 1 << (7 - (nr & 0x07));
+ *addr ^= mask;
+}
+
/* used for f2fs_inode_info->flags */
enum {
FI_NEW_INODE, /* indicate newly allocated inode */
@@ -1057,11 +1107,13 @@ enum {
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
+ FI_INLINE_DENTRY, /* used for inline dentry */
FI_APPEND_WRITE, /* inode has appended data */
FI_UPDATE_WRITE, /* inode has in-place-update data */
FI_NEED_IPU, /* used for ipu per file */
FI_ATOMIC_FILE, /* indicate atomic file */
FI_VOLATILE_FILE, /* indicate volatile file */
+ FI_DATA_EXIST, /* indicate data exists */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1087,15 +1139,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
set_inode_flag(fi, FI_ACL_MODE);
}
-static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag)
-{
- if (is_inode_flag_set(fi, FI_ACL_MODE)) {
- clear_inode_flag(fi, FI_ACL_MODE);
- return 1;
- }
- return 0;
-}
-
static inline void get_inline_info(struct f2fs_inode_info *fi,
struct f2fs_inode *ri)
{
@@ -1103,6 +1146,10 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
set_inode_flag(fi, FI_INLINE_XATTR);
if (ri->i_inline & F2FS_INLINE_DATA)
set_inode_flag(fi, FI_INLINE_DATA);
+ if (ri->i_inline & F2FS_INLINE_DENTRY)
+ set_inode_flag(fi, FI_INLINE_DENTRY);
+ if (ri->i_inline & F2FS_DATA_EXIST)
+ set_inode_flag(fi, FI_DATA_EXIST);
}
static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1114,6 +1161,10 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
ri->i_inline |= F2FS_INLINE_XATTR;
if (is_inode_flag_set(fi, FI_INLINE_DATA))
ri->i_inline |= F2FS_INLINE_DATA;
+ if (is_inode_flag_set(fi, FI_INLINE_DENTRY))
+ ri->i_inline |= F2FS_INLINE_DENTRY;
+ if (is_inode_flag_set(fi, FI_DATA_EXIST))
+ ri->i_inline |= F2FS_DATA_EXIST;
}
static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1148,6 +1199,17 @@ static inline int f2fs_has_inline_data(struct inode *inode)
return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
}
+static inline void f2fs_clear_inline_inode(struct inode *inode)
+{
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+}
+
+static inline int f2fs_exist_data(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
+}
+
static inline bool f2fs_is_atomic_file(struct inode *inode)
{
return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
@@ -1164,6 +1226,23 @@ static inline void *inline_data_addr(struct page *page)
return (void *)&(ri->i_addr[1]);
}
+static inline int f2fs_has_inline_dentry(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
+}
+
+static inline void *inline_dentry_addr(struct page *page)
+{
+ struct f2fs_inode *ri = F2FS_INODE(page);
+ return (void *)&(ri->i_addr[1]);
+}
+
+static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
+{
+ if (!f2fs_has_inline_dentry(dir))
+ kunmap(page);
+}
+
static inline int f2fs_readonly(struct super_block *sb)
{
return sb->s_flags & MS_RDONLY;
@@ -1224,6 +1303,19 @@ struct dentry *f2fs_get_parent(struct dentry *child);
/*
* dir.c
*/
+extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
+void set_de_type(struct f2fs_dir_entry *, struct inode *);
+struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
+ struct f2fs_dentry_ptr *);
+bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
+ unsigned int);
+void do_make_empty_dir(struct inode *, struct inode *,
+ struct f2fs_dentry_ptr *);
+struct page *init_inode_metadata(struct inode *, struct inode *,
+ const struct qstr *, struct page *);
+void update_parent_metadata(struct inode *, struct inode *, unsigned int);
+int room_for_filename(const void *, int, int);
+void f2fs_drop_nlink(struct inode *, struct inode *, struct page *);
struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *,
struct page **);
struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
@@ -1232,7 +1324,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
struct page *, struct inode *);
int update_dent_inode(struct inode *, const struct qstr *);
int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
-void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
+ struct inode *);
int f2fs_do_tmpfile(struct inode *, struct inode *);
int f2fs_make_empty(struct inode *, struct inode *);
bool f2fs_empty_dir(struct inode *);
@@ -1296,6 +1389,7 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void register_inmem_page(struct inode *, struct page *);
+void invalidate_inmem_page(struct inode *, struct page *);
void commit_inmem_pages(struct inode *, bool);
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
@@ -1337,8 +1431,8 @@ void destroy_segment_manager_caches(void);
*/
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
+void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
@@ -1405,7 +1499,7 @@ struct f2fs_stat_info {
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, sits, fnids;
int total_count, utilization;
- int bg_gc, inline_inode;
+ int bg_gc, inline_inode, inline_dir, inmem_pages;
unsigned int valid_count, valid_node_count, valid_inode_count;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
@@ -1438,14 +1532,23 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_inc_inline_inode(inode) \
do { \
if (f2fs_has_inline_data(inode)) \
- ((F2FS_I_SB(inode))->inline_inode++); \
+ (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \
} while (0)
#define stat_dec_inline_inode(inode) \
do { \
if (f2fs_has_inline_data(inode)) \
- ((F2FS_I_SB(inode))->inline_inode--); \
+ (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \
+ } while (0)
+#define stat_inc_inline_dir(inode) \
+ do { \
+ if (f2fs_has_inline_dentry(inode)) \
+ (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \
+ } while (0)
+#define stat_dec_inline_dir(inode) \
+ do { \
+ if (f2fs_has_inline_dentry(inode)) \
+ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \
} while (0)
-
#define stat_inc_seg_type(sbi, curseg) \
((sbi)->segment_count[(curseg)->alloc_type]++)
#define stat_inc_block_count(sbi, curseg) \
@@ -1492,6 +1595,8 @@ void f2fs_destroy_root_stats(void);
#define stat_inc_read_hit(sb)
#define stat_inc_inline_inode(inode)
#define stat_dec_inline_inode(inode)
+#define stat_inc_inline_dir(inode)
+#define stat_dec_inline_dir(inode)
#define stat_inc_seg_type(sbi, curseg)
#define stat_inc_block_count(sbi, curseg)
#define stat_inc_seg_count(si, type)
@@ -1519,9 +1624,20 @@ extern const struct inode_operations f2fs_special_inode_operations;
* inline.c
*/
bool f2fs_may_inline(struct inode *);
+void read_inline_data(struct page *, struct page *);
int f2fs_read_inline_data(struct inode *, struct page *);
-int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *);
-int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
-void truncate_inline_data(struct inode *, u64);
+int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
+int f2fs_convert_inline_inode(struct inode *);
+int f2fs_write_inline_data(struct inode *, struct page *);
+void truncate_inline_data(struct page *, u64);
bool recover_inline_data(struct inode *, struct page *);
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
+ struct page **);
+struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
+int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
+int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
+ struct inode *, struct inode *);
+bool f2fs_empty_inline_dir(struct inode *);
+int f2fs_read_inline_dir(struct file *, struct dir_context *);
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8e68bb64f835..3c27e0ecb3bc 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -41,18 +41,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
sb_start_pagefault(inode->i_sb);
- /* force to convert with normal data indices */
- err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
- if (err)
- goto out;
+ f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
/* block allocation */
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_reserve_block(&dn, page->index);
- f2fs_unlock_op(sbi);
- if (err)
+ if (err) {
+ f2fs_unlock_op(sbi);
goto out;
+ }
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
file_update_time(vma->vm_file);
lock_page(page);
@@ -130,10 +130,45 @@ static inline bool need_do_checkpoint(struct inode *inode)
need_cp = true;
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
+ else if (test_opt(sbi, FASTBOOT))
+ need_cp = true;
+ else if (sbi->active_logs == 2)
+ need_cp = true;
return need_cp;
}
+static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
+ bool ret = false;
+ /* But we need to avoid that there are some inode updates */
+ if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino))
+ ret = true;
+ f2fs_put_page(i, 0);
+ return ret;
+}
+
+static void try_to_fix_pino(struct inode *inode)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ nid_t pino;
+
+ down_write(&fi->i_sem);
+ fi->xattr_ver = 0;
+ if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
+ get_parent_ino(inode, &pino)) {
+ fi->i_pino = pino;
+ file_got_pino(inode);
+ up_write(&fi->i_sem);
+
+ mark_inode_dirty_sync(inode);
+ f2fs_write_inode(inode, NULL);
+ } else {
+ up_write(&fi->i_sem);
+ }
+}
+
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
@@ -164,19 +199,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return ret;
}
+ /* if the inode is dirty, let's recover all the time */
+ if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) {
+ update_inode_page(inode);
+ goto go_write;
+ }
+
/*
* if there is no written data, don't waste time to write recovery info.
*/
if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
!exist_written_data(sbi, ino, APPEND_INO)) {
- struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
- /* But we need to avoid that there are some inode updates */
- if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) {
- f2fs_put_page(i, 0);
+ /* it may call write_inode just prior to fsync */
+ if (need_inode_page_update(sbi, ino))
goto go_write;
- }
- f2fs_put_page(i, 0);
if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
exist_written_data(sbi, ino, UPDATE_INO))
@@ -196,49 +233,36 @@ go_write:
up_read(&fi->i_sem);
if (need_cp) {
- nid_t pino;
-
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
- down_write(&fi->i_sem);
- F2FS_I(inode)->xattr_ver = 0;
- if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
- get_parent_ino(inode, &pino)) {
- F2FS_I(inode)->i_pino = pino;
- file_got_pino(inode);
- up_write(&fi->i_sem);
- mark_inode_dirty_sync(inode);
- ret = f2fs_write_inode(inode, NULL);
- if (ret)
- goto out;
- } else {
- up_write(&fi->i_sem);
- }
- } else {
+ /*
+ * We've secured consistency through sync_fs. Following pino
+ * will be used only for fsynced inodes after checkpoint.
+ */
+ try_to_fix_pino(inode);
+ goto out;
+ }
sync_nodes:
- sync_node_pages(sbi, ino, &wbc);
-
- if (need_inode_block_update(sbi, ino)) {
- mark_inode_dirty_sync(inode);
- ret = f2fs_write_inode(inode, NULL);
- if (ret)
- goto out;
- goto sync_nodes;
- }
+ sync_node_pages(sbi, ino, &wbc);
- ret = wait_on_node_pages_writeback(sbi, ino);
- if (ret)
- goto out;
+ if (need_inode_block_update(sbi, ino)) {
+ mark_inode_dirty_sync(inode);
+ f2fs_write_inode(inode, NULL);
+ goto sync_nodes;
+ }
+
+ ret = wait_on_node_pages_writeback(sbi, ino);
+ if (ret)
+ goto out;
- /* once recovery info is written, don't need to tack this */
- remove_dirty_inode(sbi, ino, APPEND_INO);
- clear_inode_flag(fi, FI_APPEND_WRITE);
+ /* once recovery info is written, don't need to tack this */
+ remove_dirty_inode(sbi, ino, APPEND_INO);
+ clear_inode_flag(fi, FI_APPEND_WRITE);
flush_out:
- remove_dirty_inode(sbi, ino, UPDATE_INO);
- clear_inode_flag(fi, FI_UPDATE_WRITE);
- ret = f2fs_issue_flush(F2FS_I_SB(inode));
- }
+ remove_dirty_inode(sbi, ino, UPDATE_INO);
+ clear_inode_flag(fi, FI_UPDATE_WRITE);
+ ret = f2fs_issue_flush(sbi);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
@@ -296,7 +320,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto fail;
/* handle inline data case */
- if (f2fs_has_inline_data(inode)) {
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
if (whence == SEEK_HOLE)
data_ofs = isize;
goto found;
@@ -374,6 +398,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct inode *inode = file_inode(file);
+
+ /* we don't need to use inline_data strictly */
+ if (f2fs_has_inline_data(inode)) {
+ int err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
+
file_accessed(file);
vma->vm_ops = &f2fs_file_vm_ops;
return 0;
@@ -415,20 +448,17 @@ void truncate_data_blocks(struct dnode_of_data *dn)
truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
}
-static void truncate_partial_data_page(struct inode *inode, u64 from)
+static int truncate_partial_data_page(struct inode *inode, u64 from)
{
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
struct page *page;
- if (f2fs_has_inline_data(inode))
- return truncate_inline_data(inode, from);
-
if (!offset)
- return;
+ return 0;
page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
if (IS_ERR(page))
- return;
+ return 0;
lock_page(page);
if (unlikely(!PageUptodate(page) ||
@@ -438,9 +468,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
set_page_dirty(page);
-
out:
f2fs_put_page(page, 1);
+ return 0;
}
int truncate_blocks(struct inode *inode, u64 from, bool lock)
@@ -450,27 +480,33 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
struct dnode_of_data dn;
pgoff_t free_from;
int count = 0, err = 0;
+ struct page *ipage;
trace_f2fs_truncate_blocks_enter(inode, from);
- if (f2fs_has_inline_data(inode))
- goto done;
-
free_from = (pgoff_t)
- ((from + blocksize - 1) >> (sbi->log_blocksize));
+ ((from + blocksize - 1) >> (sbi->log_blocksize));
if (lock)
f2fs_lock_op(sbi);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
+ }
+
+ if (f2fs_has_inline_data(inode)) {
+ f2fs_put_page(ipage, 1);
+ goto out;
+ }
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
if (err) {
if (err == -ENOENT)
goto free_next;
- if (lock)
- f2fs_unlock_op(sbi);
- trace_f2fs_truncate_blocks_exit(inode, err);
- return err;
+ goto out;
}
count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -486,11 +522,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
f2fs_put_dnode(&dn);
free_next:
err = truncate_inode_blocks(inode, free_from);
+out:
if (lock)
f2fs_unlock_op(sbi);
-done:
+
/* lastly zero out the first data page */
- truncate_partial_data_page(inode, from);
+ if (!err)
+ err = truncate_partial_data_page(inode, from);
trace_f2fs_truncate_blocks_exit(inode, err);
return err;
@@ -504,6 +542,12 @@ void f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
+ /* we should check inline_data size */
+ if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) {
+ if (f2fs_convert_inline_inode(inode))
+ return;
+ }
+
if (!truncate_blocks(inode, i_size_read(inode), true)) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
@@ -561,10 +605,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (attr->ia_valid & ATTR_SIZE) {
- err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
- if (err)
- return err;
-
if (attr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attr->ia_size);
f2fs_truncate(inode);
@@ -665,9 +705,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (offset >= inode->i_size)
return ret;
- ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
- if (ret)
- return ret;
+ if (f2fs_has_inline_data(inode)) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -721,9 +763,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (ret)
return ret;
- ret = f2fs_convert_inline_data(inode, offset + len, NULL);
- if (ret)
- return ret;
+ if (f2fs_has_inline_data(inode)) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -874,7 +918,15 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
- return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
+ return f2fs_convert_inline_inode(inode);
+}
+
+static int f2fs_release_file(struct inode *inode, struct file *filp)
+{
+ /* some remained atomic pages should discarded */
+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+ commit_inmem_pages(inode, true);
+ return 0;
}
static int f2fs_ioc_commit_atomic_write(struct file *filp)
@@ -908,7 +960,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
return -EACCES;
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
- return 0;
+
+ return f2fs_convert_inline_inode(inode);
}
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
@@ -985,6 +1038,7 @@ const struct file_operations f2fs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.open = generic_file_open,
+ .release = f2fs_release_file,
.mmap = f2fs_file_mmap,
.fsync = f2fs_sync_file,
.fallocate = f2fs_fallocate,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2a8f4acdb86b..eec0933a4819 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -96,8 +96,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
dev_t dev = sbi->sb->s_bdev->bd_dev;
int err = 0;
- if (!test_opt(sbi, BG_GC))
- goto out;
gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
if (!gc_th) {
err = -ENOMEM;
@@ -340,34 +338,39 @@ static const struct victim_selection default_v_ops = {
.get_victim = get_victim_by_default,
};
-static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
+static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
{
struct inode_entry *ie;
- list_for_each_entry(ie, ilist, list)
- if (ie->inode->i_ino == ino)
- return ie->inode;
+ ie = radix_tree_lookup(&gc_list->iroot, ino);
+ if (ie)
+ return ie->inode;
return NULL;
}
-static void add_gc_inode(struct inode *inode, struct list_head *ilist)
+static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
{
struct inode_entry *new_ie;
- if (inode == find_gc_inode(inode->i_ino, ilist)) {
+ if (inode == find_gc_inode(gc_list, inode->i_ino)) {
iput(inode);
return;
}
-
new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
new_ie->inode = inode;
- list_add_tail(&new_ie->list, ilist);
+retry:
+ if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) {
+ cond_resched();
+ goto retry;
+ }
+ list_add_tail(&new_ie->list, &gc_list->ilist);
}
-static void put_gc_inode(struct list_head *ilist)
+static void put_gc_inode(struct gc_inode_list *gc_list)
{
struct inode_entry *ie, *next_ie;
- list_for_each_entry_safe(ie, next_ie, ilist, list) {
+ list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
+ radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
iput(ie->inode);
list_del(&ie->list);
kmem_cache_free(winode_slab, ie);
@@ -553,7 +556,7 @@ out:
* the victim data block is ignored.
*/
static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
- struct list_head *ilist, unsigned int segno, int gc_type)
+ struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
{
struct super_block *sb = sbi->sb;
struct f2fs_summary *entry;
@@ -605,27 +608,27 @@ next_step:
data_page = find_data_page(inode,
start_bidx + ofs_in_node, false);
- if (IS_ERR(data_page))
- goto next_iput;
+ if (IS_ERR(data_page)) {
+ iput(inode);
+ continue;
+ }
f2fs_put_page(data_page, 0);
- add_gc_inode(inode, ilist);
- } else {
- inode = find_gc_inode(dni.ino, ilist);
- if (inode) {
- start_bidx = start_bidx_of_node(nofs,
- F2FS_I(inode));
- data_page = get_lock_data_page(inode,
+ add_gc_inode(gc_list, inode);
+ continue;
+ }
+
+ /* phase 3 */
+ inode = find_gc_inode(gc_list, dni.ino);
+ if (inode) {
+ start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+ data_page = get_lock_data_page(inode,
start_bidx + ofs_in_node);
- if (IS_ERR(data_page))
- continue;
- move_data_page(inode, data_page, gc_type);
- stat_inc_data_blk_count(sbi, 1);
- }
+ if (IS_ERR(data_page))
+ continue;
+ move_data_page(inode, data_page, gc_type);
+ stat_inc_data_blk_count(sbi, 1);
}
- continue;
-next_iput:
- iput(inode);
}
if (++phase < 4)
@@ -646,18 +649,20 @@ next_iput:
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
- int gc_type, int type)
+ int gc_type)
{
struct sit_info *sit_i = SIT_I(sbi);
int ret;
+
mutex_lock(&sit_i->sentry_lock);
- ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS);
+ ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
+ NO_CHECK_TYPE, LFS);
mutex_unlock(&sit_i->sentry_lock);
return ret;
}
static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
- struct list_head *ilist, int gc_type)
+ struct gc_inode_list *gc_list, int gc_type)
{
struct page *sum_page;
struct f2fs_summary_block *sum;
@@ -675,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
gc_node_segment(sbi, sum->entries, segno, gc_type);
break;
case SUM_TYPE_DATA:
- gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
+ gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type);
break;
}
blk_finish_plug(&plug);
@@ -688,16 +693,18 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
int f2fs_gc(struct f2fs_sb_info *sbi)
{
- struct list_head ilist;
unsigned int segno, i;
int gc_type = BG_GC;
int nfree = 0;
int ret = -1;
- struct cp_control cpc = {
- .reason = CP_SYNC,
+ struct cp_control cpc;
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(GFP_NOFS),
};
- INIT_LIST_HEAD(&ilist);
+ cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC;
+
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
@@ -709,7 +716,7 @@ gc_more:
write_checkpoint(sbi, &cpc);
}
- if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
+ if (!__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
@@ -719,7 +726,7 @@ gc_more:
META_SSA);
for (i = 0; i < sbi->segs_per_sec; i++)
- do_garbage_collect(sbi, segno + i, &ilist, gc_type);
+ do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
if (gc_type == FG_GC) {
sbi->cur_victim_sec = NULL_SEGNO;
@@ -735,7 +742,7 @@ gc_more:
stop:
mutex_unlock(&sbi->gc_mutex);
- put_gc_inode(&ilist);
+ put_gc_inode(&gc_list);
return ret;
}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 16f0b2b22999..6ff7ad38463e 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -40,6 +40,11 @@ struct inode_entry {
struct inode *inode;
};
+struct gc_inode_list {
+ struct list_head ilist;
+ struct radix_tree_root iroot;
+};
+
/*
* inline functions
*/
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 88036fd75797..f2d3c581e776 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -15,35 +15,44 @@
bool f2fs_may_inline(struct inode *inode)
{
- block_t nr_blocks;
- loff_t i_size;
-
if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
return false;
if (f2fs_is_atomic_file(inode))
return false;
- nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
- if (inode->i_blocks > nr_blocks)
+ if (!S_ISREG(inode->i_mode))
return false;
- i_size = i_size_read(inode);
- if (i_size > MAX_INLINE_DATA)
+ if (i_size_read(inode) > MAX_INLINE_DATA)
return false;
return true;
}
-int f2fs_read_inline_data(struct inode *inode, struct page *page)
+void read_inline_data(struct page *page, struct page *ipage)
{
- struct page *ipage;
void *src_addr, *dst_addr;
- if (page->index) {
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
- goto out;
- }
+ if (PageUptodate(page))
+ return;
+
+ f2fs_bug_on(F2FS_P_SB(page), page->index);
+
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+ /* Copy the whole inline data block */
+ src_addr = inline_data_addr(ipage);
+ dst_addr = kmap_atomic(page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ flush_dcache_page(page);
+ kunmap_atomic(dst_addr);
+ SetPageUptodate(page);
+}
+
+int f2fs_read_inline_data(struct inode *inode, struct page *page)
+{
+ struct page *ipage;
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage)) {
@@ -51,112 +60,116 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
return PTR_ERR(ipage);
}
- zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+ if (!f2fs_has_inline_data(inode)) {
+ f2fs_put_page(ipage, 1);
+ return -EAGAIN;
+ }
- /* Copy the whole inline data block */
- src_addr = inline_data_addr(ipage);
- dst_addr = kmap(page);
- memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
- kunmap(page);
- f2fs_put_page(ipage, 1);
+ if (page->index)
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ else
+ read_inline_data(page, ipage);
-out:
SetPageUptodate(page);
+ f2fs_put_page(ipage, 1);
unlock_page(page);
-
return 0;
}
-static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
+int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
- int err = 0;
- struct page *ipage;
- struct dnode_of_data dn;
void *src_addr, *dst_addr;
block_t new_blk_addr;
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_io_info fio = {
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
};
+ int dirty, err;
- f2fs_lock_op(sbi);
- ipage = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(ipage)) {
- err = PTR_ERR(ipage);
- goto out;
- }
+ f2fs_bug_on(F2FS_I_SB(dn->inode), page->index);
- /* someone else converted inline_data already */
- if (!f2fs_has_inline_data(inode))
- goto out;
+ if (!f2fs_exist_data(dn->inode))
+ goto clear_out;
- /*
- * i_addr[0] is not used for inline data,
- * so reserving new block will not destroy inline data
- */
- set_new_dnode(&dn, inode, ipage, NULL, 0);
- err = f2fs_reserve_block(&dn, 0);
+ err = f2fs_reserve_block(dn, 0);
if (err)
- goto out;
+ return err;
f2fs_wait_on_page_writeback(page, DATA);
+
+ if (PageUptodate(page))
+ goto no_update;
+
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
/* Copy the whole inline data block */
- src_addr = inline_data_addr(ipage);
- dst_addr = kmap(page);
+ src_addr = inline_data_addr(dn->inode_page);
+ dst_addr = kmap_atomic(page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
- kunmap(page);
+ flush_dcache_page(page);
+ kunmap_atomic(dst_addr);
SetPageUptodate(page);
+no_update:
+ /* clear dirty state */
+ dirty = clear_page_dirty_for_io(page);
/* write data page to try to make data consistent */
set_page_writeback(page);
- write_data_page(page, &dn, &new_blk_addr, &fio);
- update_extent_cache(new_blk_addr, &dn);
+
+ write_data_page(page, dn, &new_blk_addr, &fio);
+ update_extent_cache(new_blk_addr, dn);
f2fs_wait_on_page_writeback(page, DATA);
+ if (dirty)
+ inode_dec_dirty_pages(dn->inode);
- /* clear inline data and flag after data writeback */
- zero_user_segment(ipage, INLINE_DATA_OFFSET,
- INLINE_DATA_OFFSET + MAX_INLINE_DATA);
- clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
- stat_dec_inline_inode(inode);
+ /* this converted inline_data should be recovered. */
+ set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
- sync_inode_page(&dn);
- f2fs_put_dnode(&dn);
-out:
- f2fs_unlock_op(sbi);
- return err;
+ /* clear inline data and flag after data writeback */
+ truncate_inline_data(dn->inode_page, 0);
+clear_out:
+ stat_dec_inline_inode(dn->inode);
+ f2fs_clear_inline_inode(dn->inode);
+ sync_inode_page(dn);
+ f2fs_put_dnode(dn);
+ return 0;
}
-int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
- struct page *page)
+int f2fs_convert_inline_inode(struct inode *inode)
{
- struct page *new_page = page;
- int err;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct dnode_of_data dn;
+ struct page *ipage, *page;
+ int err = 0;
- if (!f2fs_has_inline_data(inode))
- return 0;
- else if (to_size <= MAX_INLINE_DATA)
- return 0;
+ page = grab_cache_page(inode->i_mapping, 0);
+ if (!page)
+ return -ENOMEM;
+
+ f2fs_lock_op(sbi);
- if (!page || page->index != 0) {
- new_page = grab_cache_page(inode->i_mapping, 0);
- if (!new_page)
- return -ENOMEM;
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ err = PTR_ERR(ipage);
+ goto out;
}
- err = __f2fs_convert_inline_data(inode, new_page);
- if (!page || page->index != 0)
- f2fs_put_page(new_page, 1);
+ set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+ if (f2fs_has_inline_data(inode))
+ err = f2fs_convert_inline_page(&dn, page);
+
+ f2fs_put_dnode(&dn);
+out:
+ f2fs_unlock_op(sbi);
+
+ f2fs_put_page(page, 1);
return err;
}
-int f2fs_write_inline_data(struct inode *inode,
- struct page *page, unsigned size)
+int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
- struct page *ipage;
struct dnode_of_data dn;
int err;
@@ -164,47 +177,39 @@ int f2fs_write_inline_data(struct inode *inode,
err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
- ipage = dn.inode_page;
- f2fs_wait_on_page_writeback(ipage, NODE);
- zero_user_segment(ipage, INLINE_DATA_OFFSET,
- INLINE_DATA_OFFSET + MAX_INLINE_DATA);
- src_addr = kmap(page);
- dst_addr = inline_data_addr(ipage);
- memcpy(dst_addr, src_addr, size);
- kunmap(page);
-
- /* Release the first data block if it is allocated */
if (!f2fs_has_inline_data(inode)) {
- truncate_data_blocks_range(&dn, 1);
- set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
- stat_inc_inline_inode(inode);
+ f2fs_put_dnode(&dn);
+ return -EAGAIN;
}
+ f2fs_bug_on(F2FS_I_SB(inode), page->index);
+
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE);
+ src_addr = kmap_atomic(page);
+ dst_addr = inline_data_addr(dn.inode_page);
+ memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+ kunmap_atomic(src_addr);
+
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
-
return 0;
}
-void truncate_inline_data(struct inode *inode, u64 from)
+void truncate_inline_data(struct page *ipage, u64 from)
{
- struct page *ipage;
+ void *addr;
if (from >= MAX_INLINE_DATA)
return;
- ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
- if (IS_ERR(ipage))
- return;
-
f2fs_wait_on_page_writeback(ipage, NODE);
- zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
- INLINE_DATA_OFFSET + MAX_INLINE_DATA);
- set_page_dirty(ipage);
- f2fs_put_page(ipage, 1);
+ addr = inline_data_addr(ipage);
+ memset(addr + from, 0, MAX_INLINE_DATA - from);
}
bool recover_inline_data(struct inode *inode, struct page *npage)
@@ -236,6 +241,10 @@ process_inline:
src_addr = inline_data_addr(npage);
dst_addr = inline_data_addr(ipage);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
return true;
@@ -244,16 +253,279 @@ process_inline:
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- f2fs_wait_on_page_writeback(ipage, NODE);
- zero_user_segment(ipage, INLINE_DATA_OFFSET,
- INLINE_DATA_OFFSET + MAX_INLINE_DATA);
- clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ truncate_inline_data(ipage, 0);
+ f2fs_clear_inline_inode(inode);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
truncate_blocks(inode, 0, false);
- set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
goto process_inline;
}
return false;
}
+
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+ struct qstr *name, struct page **res_page)
+{
+ struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct f2fs_inline_dentry *inline_dentry;
+ struct f2fs_dir_entry *de;
+ struct f2fs_dentry_ptr d;
+ struct page *ipage;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return NULL;
+
+ inline_dentry = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)inline_dentry, 2);
+ de = find_target_dentry(name, NULL, &d);
+
+ unlock_page(ipage);
+ if (de)
+ *res_page = ipage;
+ else
+ f2fs_put_page(ipage, 0);
+
+ /*
+ * For the most part, it should be a bug when name_len is zero.
+ * We stop here for figuring out where the bugs has occurred.
+ */
+ f2fs_bug_on(sbi, d.max < 0);
+ return de;
+}
+
+struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir,
+ struct page **p)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ struct f2fs_dir_entry *de;
+ struct f2fs_inline_dentry *dentry_blk;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return NULL;
+
+ dentry_blk = inline_data_addr(ipage);
+ de = &dentry_blk->dentry[1];
+ *p = ipage;
+ unlock_page(ipage);
+ return de;
+}
+
+int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+ struct page *ipage)
+{
+ struct f2fs_inline_dentry *dentry_blk;
+ struct f2fs_dentry_ptr d;
+
+ dentry_blk = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 2);
+ do_make_empty_dir(inode, parent, &d);
+
+ set_page_dirty(ipage);
+
+ /* update i_size to MAX_INLINE_DATA */
+ if (i_size_read(inode) < MAX_INLINE_DATA) {
+ i_size_write(inode, MAX_INLINE_DATA);
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
+ }
+ return 0;
+}
+
+static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
+ struct f2fs_inline_dentry *inline_dentry)
+{
+ struct page *page;
+ struct dnode_of_data dn;
+ struct f2fs_dentry_block *dentry_blk;
+ int err;
+
+ page = grab_cache_page(dir->i_mapping, 0);
+ if (!page)
+ return -ENOMEM;
+
+ set_new_dnode(&dn, dir, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, 0);
+ if (err)
+ goto out;
+
+ f2fs_wait_on_page_writeback(page, DATA);
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+
+ dentry_blk = kmap_atomic(page);
+
+ /* copy data from inline dentry block to new dentry block */
+ memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
+ INLINE_DENTRY_BITMAP_SIZE);
+ memcpy(dentry_blk->dentry, inline_dentry->dentry,
+ sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
+ memcpy(dentry_blk->filename, inline_dentry->filename,
+ NR_INLINE_DENTRY * F2FS_SLOT_LEN);
+
+ kunmap_atomic(dentry_blk);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+
+ /* clear inline dir and flag after data writeback */
+ truncate_inline_data(ipage, 0);
+
+ stat_dec_inline_dir(dir);
+ clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
+
+ if (i_size_read(dir) < PAGE_CACHE_SIZE) {
+ i_size_write(dir, PAGE_CACHE_SIZE);
+ set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+
+ sync_inode_page(&dn);
+out:
+ f2fs_put_page(page, 1);
+ return err;
+}
+
+int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
+ struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ unsigned int bit_pos;
+ f2fs_hash_t name_hash;
+ struct f2fs_dir_entry *de;
+ size_t namelen = name->len;
+ struct f2fs_inline_dentry *dentry_blk = NULL;
+ int slots = GET_DENTRY_SLOTS(namelen);
+ struct page *page;
+ int err = 0;
+ int i;
+
+ name_hash = f2fs_dentry_hash(name);
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
+
+ dentry_blk = inline_data_addr(ipage);
+ bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+ slots, NR_INLINE_DENTRY);
+ if (bit_pos >= NR_INLINE_DENTRY) {
+ err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
+ if (!err)
+ err = -EAGAIN;
+ goto out;
+ }
+
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, name, ipage);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ de = &dentry_blk->dentry[bit_pos];
+ de->hash_code = name_hash;
+ de->name_len = cpu_to_le16(namelen);
+ memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
+ de->ino = cpu_to_le32(inode->i_ino);
+ set_de_type(de, inode);
+ for (i = 0; i < slots; i++)
+ test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+ set_page_dirty(ipage);
+
+ /* we don't need to mark_inode_dirty now */
+ F2FS_I(inode)->i_pino = dir->i_ino;
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+
+ update_parent_metadata(dir, inode, 0);
+fail:
+ up_write(&F2FS_I(inode)->i_sem);
+
+ if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+ update_inode(dir, ipage);
+ clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+ }
+out:
+ f2fs_put_page(ipage, 1);
+ return err;
+}
+
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
+ struct inode *dir, struct inode *inode)
+{
+ struct f2fs_inline_dentry *inline_dentry;
+ int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
+ unsigned int bit_pos;
+ int i;
+
+ lock_page(page);
+ f2fs_wait_on_page_writeback(page, NODE);
+
+ inline_dentry = inline_data_addr(page);
+ bit_pos = dentry - inline_dentry->dentry;
+ for (i = 0; i < slots; i++)
+ test_and_clear_bit_le(bit_pos + i,
+ &inline_dentry->dentry_bitmap);
+
+ set_page_dirty(page);
+
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+ if (inode)
+ f2fs_drop_nlink(dir, inode, page);
+
+ f2fs_put_page(page, 1);
+}
+
+bool f2fs_empty_inline_dir(struct inode *dir)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct page *ipage;
+ unsigned int bit_pos = 2;
+ struct f2fs_inline_dentry *dentry_blk;
+
+ ipage = get_node_page(sbi, dir->i_ino);
+ if (IS_ERR(ipage))
+ return false;
+
+ dentry_blk = inline_data_addr(ipage);
+ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+ NR_INLINE_DENTRY,
+ bit_pos);
+
+ f2fs_put_page(ipage, 1);
+
+ if (bit_pos < NR_INLINE_DENTRY)
+ return false;
+
+ return true;
+}
+
+int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx)
+{
+ struct inode *inode = file_inode(file);
+ struct f2fs_inline_dentry *inline_dentry = NULL;
+ struct page *ipage = NULL;
+ struct f2fs_dentry_ptr d;
+
+ if (ctx->pos == NR_INLINE_DENTRY)
+ return 0;
+
+ ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
+
+ inline_dentry = inline_data_addr(ipage);
+
+ make_dentry_ptr(&d, (void *)inline_dentry, 2);
+
+ if (!f2fs_fill_dentries(ctx, &d, 0))
+ ctx->pos = NR_INLINE_DENTRY;
+
+ f2fs_put_page(ipage, 1);
+ return 0;
+}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0deead4505e7..196cc7843aaf 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -67,12 +67,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
+static int __recover_inline_status(struct inode *inode, struct page *ipage)
+{
+ void *inline_data = inline_data_addr(ipage);
+ struct f2fs_inode *ri;
+ void *zbuf;
+
+ zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS);
+ if (!zbuf)
+ return -ENOMEM;
+
+ if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) {
+ kfree(zbuf);
+ return 0;
+ }
+ kfree(zbuf);
+
+ f2fs_wait_on_page_writeback(ipage, NODE);
+ set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
+ ri = F2FS_INODE(ipage);
+ set_raw_inline(F2FS_I(inode), ri);
+ set_page_dirty(ipage);
+ return 0;
+}
+
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
struct f2fs_inode *ri;
+ int err = 0;
/* Check if ino is within scope */
if (check_nid_range(sbi, inode->i_ino)) {
@@ -114,11 +140,19 @@ static int do_read_inode(struct inode *inode)
get_extent_info(&fi->ext, ri->i_ext);
get_inline_info(fi, ri);
+ /* check data exist */
+ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
+ err = __recover_inline_status(inode, node_page);
+
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
f2fs_put_page(node_page, 1);
- return 0;
+
+ stat_inc_inline_inode(inode);
+ stat_inc_inline_dir(inode);
+
+ return err;
}
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
@@ -156,7 +190,7 @@ make_now:
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
@@ -295,11 +329,12 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_lock_op(sbi);
remove_inode_page(inode);
- stat_dec_inline_inode(inode);
f2fs_unlock_op(sbi);
sb_end_intwrite(inode->i_sb);
no_delete:
+ stat_dec_inline_dir(inode);
+ stat_dec_inline_inode(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
@@ -325,8 +360,9 @@ void handle_failed_inode(struct inode *inode)
f2fs_truncate(inode);
remove_inode_page(inode);
- stat_dec_inline_inode(inode);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
alloc_nid_failed(sbi, inode->i_ino);
f2fs_unlock_op(sbi);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 0d2526e5aa11..547a2deeb1ac 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -54,6 +54,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_free = true;
goto out;
}
+
+ if (f2fs_may_inline(inode))
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode))
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
+
trace_f2fs_new_inode(inode, 0);
mark_inode_dirty(inode);
return inode;
@@ -129,8 +135,12 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
alloc_nid_done(sbi, ino);
+ stat_inc_inline_inode(inode);
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out:
handle_failed_inode(inode);
@@ -157,6 +167,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
f2fs_unlock_op(sbi);
d_instantiate(dentry, inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out:
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
@@ -187,14 +200,12 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (de) {
nid_t ino = le32_to_cpu(de->ino);
- kunmap(page);
+ f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
inode = f2fs_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
-
- stat_inc_inline_inode(inode);
}
return d_splice_alias(inode, dentry);
@@ -219,15 +230,18 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
err = acquire_orphan_inode(sbi);
if (err) {
f2fs_unlock_op(sbi);
- kunmap(page);
+ f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
goto fail;
}
- f2fs_delete_entry(de, page, inode);
+ f2fs_delete_entry(de, page, dir, inode);
f2fs_unlock_op(sbi);
/* In order to evict this inode, we set it dirty */
mark_inode_dirty(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
fail:
trace_f2fs_unlink_exit(inode, err);
return err;
@@ -261,6 +275,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return err;
out:
handle_failed_inode(inode);
@@ -291,11 +308,14 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_fail;
f2fs_unlock_op(sbi);
+ stat_inc_inline_dir(inode);
alloc_nid_done(sbi, inode->i_ino);
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out_fail:
@@ -338,8 +358,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
f2fs_unlock_op(sbi);
alloc_nid_done(sbi, inode->i_ino);
+
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+
+ if (IS_DIRSYNC(dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out:
handle_failed_inode(inode);
@@ -435,7 +459,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(old_inode);
- f2fs_delete_entry(old_entry, old_page, NULL);
+ f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
if (old_dir_entry) {
if (old_dir != new_dir) {
@@ -443,7 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dir_page, new_dir);
update_inode_page(old_inode);
} else {
- kunmap(old_dir_page);
+ f2fs_dentry_kunmap(old_inode, old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
drop_nlink(old_dir);
@@ -452,19 +476,22 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
f2fs_unlock_op(sbi);
+
+ if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
put_out_dir:
f2fs_unlock_op(sbi);
- kunmap(new_page);
+ f2fs_dentry_kunmap(new_dir, new_page);
f2fs_put_page(new_page, 0);
out_dir:
if (old_dir_entry) {
- kunmap(old_dir_page);
+ f2fs_dentry_kunmap(old_inode, old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
out_old:
- kunmap(old_page);
+ f2fs_dentry_kunmap(old_dir, old_page);
f2fs_put_page(old_page, 0);
out:
return err;
@@ -588,6 +615,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
update_inode_page(new_dir);
f2fs_unlock_op(sbi);
+
+ if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+ f2fs_sync_fs(sbi->sb, 1);
return 0;
out_undo:
/* Still we may fail to recover name info of f2fs_inode here */
@@ -596,19 +626,19 @@ out_unlock:
f2fs_unlock_op(sbi);
out_new_dir:
if (new_dir_entry) {
- kunmap(new_dir_page);
+ f2fs_dentry_kunmap(new_inode, new_dir_page);
f2fs_put_page(new_dir_page, 0);
}
out_old_dir:
if (old_dir_entry) {
- kunmap(old_dir_page);
+ f2fs_dentry_kunmap(old_inode, old_dir_page);
f2fs_put_page(old_dir_page, 0);
}
out_new:
- kunmap(new_page);
+ f2fs_dentry_kunmap(new_dir, new_page);
f2fs_put_page(new_page, 0);
out_old:
- kunmap(old_page);
+ f2fs_dentry_kunmap(old_dir, old_page);
f2fs_put_page(old_page, 0);
out:
return err;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 44b8afef43d9..f83326ca32ef 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct sysinfo val;
+ unsigned long avail_ram;
unsigned long mem_size = 0;
bool res = false;
si_meminfo(&val);
- /* give 25%, 25%, 50% memory for each components respectively */
+
+ /* only uses low memory */
+ avail_ram = val.totalram - val.totalhigh;
+
+ /* give 25%, 25%, 50%, 50% memory for each components respectively */
if (type == FREE_NIDS) {
- mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
+ PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
- mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
+ PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DIRTY_DENTS) {
if (sbi->sb->s_bdi->dirty_exceeded)
return false;
mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+ } else if (type == INO_ENTRIES) {
+ int i;
+
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
+ for (i = 0; i <= UPDATE_INO; i++)
+ mem_size += (sbi->im[i].ino_num *
+ sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
}
return res;
}
@@ -131,7 +147,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
if (get_nat_flag(ne, IS_DIRTY))
return;
-retry:
+
head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (!head) {
head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
@@ -140,11 +156,7 @@ retry:
INIT_LIST_HEAD(&head->set_list);
head->set = set;
head->entry_cnt = 0;
-
- if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
- cond_resched();
- goto retry;
- }
+ f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
}
list_move_tail(&ne->list, &head->entry_list);
nm_i->dirty_nat_cnt++;
@@ -155,7 +167,7 @@ retry:
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry *ne)
{
- nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
+ nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
struct nat_entry_set *head;
head = radix_tree_lookup(&nm_i->nat_set_root, set);
@@ -180,11 +192,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
struct nat_entry *e;
bool is_cp = true;
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = false;
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return is_cp;
}
@@ -194,11 +206,11 @@ bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
struct nat_entry *e;
bool fsynced = false;
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino);
if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
fsynced = true;
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return fsynced;
}
@@ -208,13 +220,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
struct nat_entry *e;
bool need_update = true;
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
(get_nat_flag(e, IS_CHECKPOINTED) ||
get_nat_flag(e, HAS_FSYNCED_INODE)))
need_update = false;
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return need_update;
}
@@ -222,13 +234,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
- new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
- if (!new)
- return NULL;
- if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
- kmem_cache_free(nat_entry_slab, new);
- return NULL;
- }
+ new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
+ f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
nat_reset_flag(new);
@@ -241,18 +248,14 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
struct f2fs_nat_entry *ne)
{
struct nat_entry *e;
-retry:
- write_lock(&nm_i->nat_tree_lock);
+
+ down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
e = grab_nat_entry(nm_i, nid);
- if (!e) {
- write_unlock(&nm_i->nat_tree_lock);
- goto retry;
- }
node_info_from_raw_nat(&e->ni, ne);
}
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -260,15 +263,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
-retry:
- write_lock(&nm_i->nat_tree_lock);
+
+ down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
e = grab_nat_entry(nm_i, ni->nid);
- if (!e) {
- write_unlock(&nm_i->nat_tree_lock);
- goto retry;
- }
e->ni = *ni;
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
@@ -310,7 +309,7 @@ retry:
set_nat_flag(e, HAS_FSYNCED_INODE, true);
set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
}
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -320,7 +319,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
if (available_free_memory(sbi, NAT_ENTRIES))
return 0;
- write_lock(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
struct nat_entry *ne;
ne = list_first_entry(&nm_i->nat_entries,
@@ -328,7 +327,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
}
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
return nr_shrink;
}
@@ -351,14 +350,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
ni->nid = nid;
/* Check nat cache */
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
}
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
if (e)
return;
@@ -1298,16 +1297,22 @@ static int f2fs_write_node_page(struct page *page,
return 0;
}
- if (wbc->for_reclaim)
- goto redirty_out;
-
- down_read(&sbi->node_write);
+ if (wbc->for_reclaim) {
+ if (!down_read_trylock(&sbi->node_write))
+ goto redirty_out;
+ } else {
+ down_read(&sbi->node_write);
+ }
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
unlock_page(page);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+
return 0;
redirty_out:
@@ -1410,13 +1415,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
if (build) {
/* do not add allocated nids */
- read_lock(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
if (ne &&
(!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
allocated = true;
- read_unlock(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
if (allocated)
return 0;
}
@@ -1425,15 +1430,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
i->nid = nid;
i->state = NID_NEW;
+ if (radix_tree_preload(GFP_NOFS)) {
+ kmem_cache_free(free_nid_slab, i);
+ return 0;
+ }
+
spin_lock(&nm_i->free_nid_list_lock);
if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
spin_unlock(&nm_i->free_nid_list_lock);
+ radix_tree_preload_end();
kmem_cache_free(free_nid_slab, i);
return 0;
}
list_add_tail(&i->list, &nm_i->free_nid_list);
nm_i->fcnt++;
spin_unlock(&nm_i->free_nid_list_lock);
+ radix_tree_preload_end();
return 1;
}
@@ -1804,21 +1816,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
raw_ne = nat_in_journal(sum, i);
-retry:
- write_lock(&nm_i->nat_tree_lock);
- ne = __lookup_nat_cache(nm_i, nid);
- if (ne)
- goto found;
- ne = grab_nat_entry(nm_i, nid);
+ down_write(&nm_i->nat_tree_lock);
+ ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
- write_unlock(&nm_i->nat_tree_lock);
- goto retry;
+ ne = grab_nat_entry(nm_i, nid);
+ node_info_from_raw_nat(&ne->ni, &raw_ne);
}
- node_info_from_raw_nat(&ne->ni, &raw_ne);
-found:
__set_nat_cache_dirty(nm_i, ne);
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
@@ -1889,10 +1895,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
raw_nat_from_node_info(raw_ne, &ne->ni);
- write_lock(&NM_I(sbi)->nat_tree_lock);
+ down_write(&NM_I(sbi)->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
- write_unlock(&NM_I(sbi)->nat_tree_lock);
+ up_write(&NM_I(sbi)->nat_tree_lock);
if (nat_get_blkaddr(ne) == NULL_ADDR)
add_free_nid(sbi, nid, false);
@@ -1903,10 +1909,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
else
f2fs_put_page(page, 1);
- if (!set->entry_cnt) {
- radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
- kmem_cache_free(nat_entry_set_slab, set);
- }
+ f2fs_bug_on(sbi, set->entry_cnt);
+
+ radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
+ kmem_cache_free(nat_entry_set_slab, set);
}
/*
@@ -1923,6 +1929,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
nid_t set_idx = 0;
LIST_HEAD(sets);
+ if (!nm_i->dirty_nat_cnt)
+ return;
/*
* if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them
@@ -1931,9 +1939,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
- if (!nm_i->dirty_nat_cnt)
- return;
-
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, NATVEC_SIZE, setvec))) {
unsigned idx;
@@ -1973,13 +1978,13 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
- INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
- INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
+ INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
+ INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
- rwlock_init(&nm_i->nat_tree_lock);
+ init_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2035,7 +2040,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
spin_unlock(&nm_i->free_nid_list_lock);
/* destroy nat cache */
- write_lock(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
@@ -2044,7 +2049,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
__del_from_nat_cache(nm_i, natvec[idx]);
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
- write_unlock(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
sbi->nm_info = NULL;
@@ -2061,17 +2066,17 @@ int __init create_node_manager_caches(void)
free_nid_slab = f2fs_kmem_cache_create("free_nid",
sizeof(struct free_nid));
if (!free_nid_slab)
- goto destory_nat_entry;
+ goto destroy_nat_entry;
nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
- goto destory_free_nid;
+ goto destroy_free_nid;
return 0;
-destory_free_nid:
+destroy_free_nid:
kmem_cache_destroy(free_nid_slab);
-destory_nat_entry:
+destroy_nat_entry:
kmem_cache_destroy(nat_entry_slab);
fail:
return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 8d5e6e0dd840..d10b6448a671 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
- DIRTY_DENTS /* indicates dirty dentry pages */
+ DIRTY_DENTS, /* indicates dirty dentry pages */
+ INO_ENTRIES, /* indicates inode entries */
};
struct nat_entry_set {
@@ -192,10 +193,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
{
unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
- if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
- f2fs_clear_bit(block_off, nm_i->nat_bitmap);
- else
- f2fs_set_bit(block_off, nm_i->nat_bitmap);
+ f2fs_change_bit(block_off, nm_i->nat_bitmap);
}
static inline void fill_node_footer(struct page *page, nid_t nid,
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index ebd013225788..9160a37e1c7a 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -111,7 +111,7 @@ retry:
iput(einode);
goto out_unmap_put;
}
- f2fs_delete_entry(de, page, einode);
+ f2fs_delete_entry(de, page, dir, einode);
iput(einode);
goto retry;
}
@@ -129,7 +129,7 @@ retry:
goto out;
out_unmap_put:
- kunmap(page);
+ f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
out_err:
iput(dir);
@@ -170,13 +170,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ ra_meta_pages(sbi, blkaddr, 1, META_POR);
+
while (1) {
struct fsync_inode_entry *entry;
if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
return 0;
- page = get_meta_page_ra(sbi, blkaddr);
+ page = get_meta_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page))
break;
@@ -227,6 +229,8 @@ next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
+
+ ra_meta_pages_cond(sbi, blkaddr);
}
f2fs_put_page(page, 1);
return err;
@@ -436,7 +440,9 @@ static int recover_data(struct f2fs_sb_info *sbi,
if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
break;
- page = get_meta_page_ra(sbi, blkaddr);
+ ra_meta_pages_cond(sbi, blkaddr);
+
+ page = get_meta_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page)) {
f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 923cb76fdc46..42607a679923 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -178,17 +178,47 @@ void register_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
+ int err;
+
+ SetPagePrivate(page);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
/* add atomic page indices to the list */
new->page = page;
INIT_LIST_HEAD(&new->list);
-
+retry:
/* increase reference count with clean state */
mutex_lock(&fi->inmem_lock);
+ err = radix_tree_insert(&fi->inmem_root, page->index, new);
+ if (err == -EEXIST) {
+ mutex_unlock(&fi->inmem_lock);
+ kmem_cache_free(inmem_entry_slab, new);
+ return;
+ } else if (err) {
+ mutex_unlock(&fi->inmem_lock);
+ goto retry;
+ }
get_page(page);
list_add_tail(&new->list, &fi->inmem_pages);
+ inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ mutex_unlock(&fi->inmem_lock);
+}
+
+void invalidate_inmem_page(struct inode *inode, struct page *page)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct inmem_pages *cur;
+
+ mutex_lock(&fi->inmem_lock);
+ cur = radix_tree_lookup(&fi->inmem_root, page->index);
+ if (cur) {
+ radix_tree_delete(&fi->inmem_root, cur->page->index);
+ f2fs_put_page(cur->page, 0);
+ list_del(&cur->list);
+ kmem_cache_free(inmem_entry_slab, cur);
+ dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ }
mutex_unlock(&fi->inmem_lock);
}
@@ -203,7 +233,16 @@ void commit_inmem_pages(struct inode *inode, bool abort)
.rw = WRITE_SYNC,
};
- f2fs_balance_fs(sbi);
+ /*
+ * The abort is true only when f2fs_evict_inode is called.
+ * Basically, the f2fs_evict_inode doesn't produce any data writes, so
+ * that we don't need to call f2fs_balance_fs.
+ * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
+ * inode becomes free by iget_locked in f2fs_iget.
+ */
+ if (!abort)
+ f2fs_balance_fs(sbi);
+
f2fs_lock_op(sbi);
mutex_lock(&fi->inmem_lock);
@@ -216,9 +255,11 @@ void commit_inmem_pages(struct inode *inode, bool abort)
do_write_data_page(cur->page, &fio);
submit_bio = true;
}
+ radix_tree_delete(&fi->inmem_root, cur->page->index);
f2fs_put_page(cur->page, 1);
list_del(&cur->list);
kmem_cache_free(inmem_entry_slab, cur);
+ dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
}
if (submit_bio)
f2fs_submit_merged_bio(sbi, DATA, WRITE);
@@ -248,7 +289,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
/* check the # of cached NAT entries and prefree segments */
if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
- excess_prefree_segs(sbi))
+ excess_prefree_segs(sbi) ||
+ available_free_memory(sbi, INO_ENTRIES))
f2fs_sync_fs(sbi->sb, true);
}
@@ -441,10 +483,33 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
}
}
-static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static void __add_discard_entry(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc, unsigned int start, unsigned int end)
{
struct list_head *head = &SM_I(sbi)->discard_list;
- struct discard_entry *new;
+ struct discard_entry *new, *last;
+
+ if (!list_empty(head)) {
+ last = list_last_entry(head, struct discard_entry, list);
+ if (START_BLOCK(sbi, cpc->trim_start) + start ==
+ last->blkaddr + last->len) {
+ last->len += end - start;
+ goto done;
+ }
+ }
+
+ new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
+ INIT_LIST_HEAD(&new->list);
+ new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
+ new->len = end - start;
+ list_add_tail(&new->list, head);
+done:
+ SM_I(sbi)->nr_discards += end - start;
+ cpc->trimmed += end - start;
+}
+
+static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
int max_blocks = sbi->blocks_per_seg;
struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
@@ -473,13 +538,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
mutex_unlock(&dirty_i->seglist_lock);
- new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
- INIT_LIST_HEAD(&new->list);
- new->blkaddr = START_BLOCK(sbi, cpc->trim_start);
- new->len = sbi->blocks_per_seg;
- list_add_tail(&new->list, head);
- SM_I(sbi)->nr_discards += sbi->blocks_per_seg;
- cpc->trimmed += sbi->blocks_per_seg;
+ __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
return;
}
@@ -489,7 +548,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
for (i = 0; i < entries; i++)
- dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
+ dmap[i] = ~(cur_map[i] | ckpt_map[i]);
while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
start = __find_rev_next_bit(dmap, max_blocks, end + 1);
@@ -501,14 +560,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (end - start < cpc->trim_minlen)
continue;
- new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
- INIT_LIST_HEAD(&new->list);
- new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
- new->len = end - start;
- cpc->trimmed += end - start;
-
- list_add_tail(&new->list, head);
- SM_I(sbi)->nr_discards += end - start;
+ __add_discard_entry(sbi, cpc, start, end);
}
}
@@ -620,10 +672,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
/* Update valid block bitmap */
if (del > 0) {
- if (f2fs_set_bit(offset, se->cur_valid_map))
+ if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
f2fs_bug_on(sbi, 1);
} else {
- if (!f2fs_clear_bit(offset, se->cur_valid_map))
+ if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
f2fs_bug_on(sbi, 1);
}
if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -1004,6 +1056,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
range->len < sbi->blocksize)
return -EINVAL;
+ cpc.trimmed = 0;
if (end <= MAIN_BLKADDR(sbi))
goto out;
@@ -1015,10 +1068,11 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
cpc.trim_start = start_segno;
cpc.trim_end = end_segno;
cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
- cpc.trimmed = 0;
/* do checkpoint to issue discard commands safely */
+ mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
+ mutex_unlock(&sbi->gc_mutex);
out:
range->len = cpc.trimmed << sbi->log_blocksize;
return 0;
@@ -1050,8 +1104,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type)
else
return CURSEG_COLD_DATA;
} else {
- if (IS_DNODE(page) && !is_cold_node(page))
- return CURSEG_HOT_NODE;
+ if (IS_DNODE(page) && is_cold_node(page))
+ return CURSEG_WARM_NODE;
else
return CURSEG_COLD_NODE;
}
@@ -1524,17 +1578,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- struct sit_info *sit_i = SIT_I(sbi);
- unsigned int offset = SIT_BLOCK_OFFSET(segno);
- block_t blk_addr = sit_i->sit_base_addr + offset;
-
- check_seg_range(sbi, segno);
-
- /* calculate sit block address */
- if (f2fs_test_bit(offset, sit_i->sit_bitmap))
- blk_addr += sit_i->sit_blocks;
-
- return get_meta_page(sbi, blk_addr);
+ return get_meta_page(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -1687,7 +1731,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* #2, flush sit entries to sit page.
*/
list_for_each_entry_safe(ses, tmp, head, set_list) {
- struct page *page;
+ struct page *page = NULL;
struct f2fs_sit_block *raw_sit = NULL;
unsigned int start_segno = ses->start_segno;
unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
@@ -2200,7 +2244,7 @@ int __init create_segment_manager_caches(void)
goto fail;
sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
- sizeof(struct nat_entry_set));
+ sizeof(struct sit_entry_set));
if (!sit_entry_set_slab)
goto destory_discard_entry;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 2495bec1c621..7f327c0ba4e3 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -657,10 +657,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
{
unsigned int block_off = SIT_BLOCK_OFFSET(start);
- if (f2fs_test_bit(block_off, sit_i->sit_bitmap))
- f2fs_clear_bit(block_off, sit_i->sit_bitmap);
- else
- f2fs_set_bit(block_off, sit_i->sit_bitmap);
+ f2fs_change_bit(block_off, sit_i->sit_bitmap);
}
static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
@@ -714,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return 0;
+
if (type == DATA)
return sbi->blocks_per_seg;
else if (type == NODE)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 41d6f700f4ee..f71421d70475 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,8 +51,10 @@ enum {
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_inline_data,
+ Opt_inline_dentry,
Opt_flush_merge,
Opt_nobarrier,
+ Opt_fastboot,
Opt_err,
};
@@ -69,8 +71,10 @@ static match_table_t f2fs_tokens = {
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
+ {Opt_inline_dentry, "inline_dentry"},
{Opt_flush_merge, "flush_merge"},
{Opt_nobarrier, "nobarrier"},
+ {Opt_fastboot, "fastboot"},
{Opt_err, NULL},
};
@@ -340,12 +344,18 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_inline_data:
set_opt(sbi, INLINE_DATA);
break;
+ case Opt_inline_dentry:
+ set_opt(sbi, INLINE_DENTRY);
+ break;
case Opt_flush_merge:
set_opt(sbi, FLUSH_MERGE);
break;
case Opt_nobarrier:
set_opt(sbi, NOBARRIER);
break;
+ case Opt_fastboot:
+ set_opt(sbi, FASTBOOT);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -373,6 +383,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
fi->i_advise = 0;
rwlock_init(&fi->ext.ext_lock);
init_rwsem(&fi->i_sem);
+ INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
@@ -473,9 +484,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
trace_f2fs_sync_fs(sb, sync);
if (sync) {
- struct cp_control cpc = {
- .reason = CP_SYNC,
- };
+ struct cp_control cpc;
+
+ cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC;
mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
@@ -562,10 +573,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",disable_ext_identify");
if (test_opt(sbi, INLINE_DATA))
seq_puts(seq, ",inline_data");
+ if (test_opt(sbi, INLINE_DENTRY))
+ seq_puts(seq, ",inline_dentry");
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
if (test_opt(sbi, NOBARRIER))
seq_puts(seq, ",nobarrier");
+ if (test_opt(sbi, FASTBOOT))
+ seq_puts(seq, ",fastboot");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -654,7 +669,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
f2fs_sync_fs(sb, 1);
need_restart_gc = true;
}
- } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
+ } else if (!sbi->gc_thread) {
err = start_gc_thread(sbi);
if (err)
goto restore_opts;
@@ -667,7 +682,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
destroy_flush_cmd_control(sbi);
- } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
+ } else if (!SM_I(sbi)->cmd_control_info) {
err = create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
@@ -922,7 +937,7 @@ retry:
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
- struct f2fs_super_block *raw_super;
+ struct f2fs_super_block *raw_super = NULL;
struct buffer_head *raw_super_buf;
struct inode *root;
long err = -EINVAL;
@@ -1123,7 +1138,7 @@ try_onemore:
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
- if (!f2fs_readonly(sb)) {
+ if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
err = start_gc_thread(sbi);
if (err)
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index deca8728117b..5072bf9ae0ef 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
}
if (strcmp(name, "") == 0)
return -EINVAL;
- return f2fs_getxattr(dentry->d_inode, type, name, buffer, size);
+ return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL);
}
static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
@@ -398,7 +398,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
- void *buffer, size_t buffer_size)
+ void *buffer, size_t buffer_size, struct page *ipage)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
@@ -412,7 +412,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
- base_addr = read_all_xattrs(inode, NULL);
+ base_addr = read_all_xattrs(inode, ipage);
if (!base_addr)
return -ENOMEM;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 34ab7dbcf5e3..969d792ca362 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -115,7 +115,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[];
extern int f2fs_setxattr(struct inode *, int, const char *,
const void *, size_t, struct page *, int);
-extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);
+extern int f2fs_getxattr(struct inode *, int, const char *, void *,
+ size_t, struct page *);
extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
#else
@@ -126,7 +127,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index,
return -EOPNOTSUPP;
}
static inline int f2fs_getxattr(struct inode *inode, int index,
- const char *name, void *buffer, size_t buffer_size)
+ const char *name, void *buffer,
+ size_t buffer_size, struct page *dpage)
{
return -EOPNOTSUPP;
}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6df8d3d885e5..b8b92c2f9683 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -736,7 +736,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
}
alias = d_find_alias(inode);
- if (alias && !vfat_d_anon_disconn(alias)) {
+ /*
+ * Checking "alias->d_parent == dentry->d_parent" to make sure
+ * FS is not corrupted (especially double linked dir).
+ */
+ if (alias && alias->d_parent == dentry->d_parent &&
+ !vfat_d_anon_disconn(alias)) {
/*
* This inode has non anonymous-DCACHE_DISCONNECTED
* dentry. This means, the user did ->lookup() by an
@@ -755,12 +760,9 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
- dentry->d_time = dentry->d_parent->d_inode->i_version;
- dentry = d_splice_alias(inode, dentry);
- if (dentry)
- dentry->d_time = dentry->d_parent->d_inode->i_version;
- return dentry;
-
+ if (!inode)
+ dentry->d_time = dir->i_version;
+ return d_splice_alias(inode, dentry);
error:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
return ERR_PTR(err);
@@ -793,7 +795,6 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
/* timestamp is already written, so mark_inode_dirty() is unneeded. */
- dentry->d_time = dentry->d_parent->d_inode->i_version;
d_instantiate(dentry, inode);
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -824,6 +825,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
clear_nlink(inode);
inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
fat_detach(inode);
+ dentry->d_time = dir->i_version;
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -849,6 +851,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry)
clear_nlink(inode);
inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
fat_detach(inode);
+ dentry->d_time = dir->i_version;
out:
mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -889,7 +892,6 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
/* timestamp is already written, so mark_inode_dirty() is unneeded. */
- dentry->d_time = dentry->d_parent->d_inode->i_version;
d_instantiate(dentry, inode);
mutex_unlock(&MSDOS_SB(sb)->s_lock);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5d4261ff5d23..c5a34f09e228 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -365,23 +365,17 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
ret = gfs2_dir_read_data(ip, hc, hsize);
if (ret < 0) {
- if (is_vmalloc_addr(hc))
- vfree(hc);
- else
- kfree(hc);
+ kvfree(hc);
return ERR_PTR(ret);
}
spin_lock(&inode->i_lock);
- if (ip->i_hash_cache) {
- if (is_vmalloc_addr(hc))
- vfree(hc);
- else
- kfree(hc);
- } else {
+ if (likely(!ip->i_hash_cache)) {
ip->i_hash_cache = hc;
+ hc = NULL;
}
spin_unlock(&inode->i_lock);
+ kvfree(hc);
return ip->i_hash_cache;
}
@@ -396,10 +390,7 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip)
{
__be64 *hc = ip->i_hash_cache;
ip->i_hash_cache = NULL;
- if (is_vmalloc_addr(hc))
- vfree(hc);
- else
- kfree(hc);
+ kvfree(hc);
}
static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
@@ -1168,10 +1159,7 @@ fail:
gfs2_dinode_out(dip, dibh->b_data);
brelse(dibh);
out_kfree:
- if (is_vmalloc_addr(hc2))
- vfree(hc2);
- else
- kfree(hc2);
+ kvfree(hc2);
return error;
}
@@ -1302,14 +1290,6 @@ static void *gfs2_alloc_sort_buffer(unsigned size)
return ptr;
}
-static void gfs2_free_sort_buffer(void *ptr)
-{
- if (is_vmalloc_addr(ptr))
- vfree(ptr);
- else
- kfree(ptr);
-}
-
static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
int *copied, unsigned *depth,
u64 leaf_no)
@@ -1393,7 +1373,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
out_free:
for(i = 0; i < leaf; i++)
brelse(larr[i]);
- gfs2_free_sort_buffer(larr);
+ kvfree(larr);
out:
return error;
}
@@ -2004,10 +1984,7 @@ out_rlist:
gfs2_rlist_free(&rlist);
gfs2_quota_unhold(dip);
out:
- if (is_vmalloc_addr(ht))
- vfree(ht);
- else
- kfree(ht);
+ kvfree(ht);
return error;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 80dd44dca028..6e600abf694a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -337,7 +337,8 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
int hint = min_t(size_t, INT_MAX, blks);
- atomic_set(&ip->i_res->rs_sizehint, hint);
+ if (hint > atomic_read(&ip->i_res->rs_sizehint))
+ atomic_set(&ip->i_res->rs_sizehint, hint);
}
/**
@@ -728,7 +729,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *dibh;
int error;
- loff_t size = len;
unsigned int nr_blks;
sector_t lblock = offset >> inode->i_blkbits;
@@ -762,11 +762,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
goto out;
}
}
- if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
- i_size_write(inode, offset + size);
-
- mark_inode_dirty(inode);
-
out:
brelse(dibh);
return error;
@@ -796,8 +791,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
}
}
-static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
- loff_t len)
+static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -811,14 +805,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
loff_t max_chunk_size = UINT_MAX & bsize_mask;
- struct gfs2_holder gh;
next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
- /* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode & ~FALLOC_FL_KEEP_SIZE)
- return -EOPNOTSUPP;
-
offset &= bsize_mask;
len = next - offset;
@@ -829,17 +818,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
if (bytes == 0)
bytes = sdp->sd_sb.sb_bsize;
- error = gfs2_rs_alloc(ip);
- if (error)
- return error;
-
- mutex_lock(&inode->i_mutex);
-
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- error = gfs2_glock_nq(&gh);
- if (unlikely(error))
- goto out_uninit;
-
gfs2_size_hint(file, offset, len);
while (len > 0) {
@@ -852,8 +830,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
}
error = gfs2_quota_lock_check(ip);
if (error)
- goto out_unlock;
-
+ return error;
retry:
gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
@@ -895,20 +872,64 @@ retry:
gfs2_quota_unlock(ip);
}
- if (error == 0)
- error = generic_write_sync(file, pos, count);
- goto out_unlock;
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) {
+ i_size_write(inode, pos + count);
+ /* Marks the inode as dirty */
+ file_update_time(file);
+ }
+
+ return generic_write_sync(file, pos, count);
out_trans_fail:
gfs2_inplace_release(ip);
out_qunlock:
gfs2_quota_unlock(ip);
+ return error;
+}
+
+static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+{
+ struct inode *inode = file_inode(file);
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&inode->i_mutex);
+
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
+ if (ret)
+ goto out_uninit;
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ (offset + len) > inode->i_size) {
+ ret = inode_newsize_ok(inode, offset + len);
+ if (ret)
+ goto out_unlock;
+ }
+
+ ret = get_write_access(inode);
+ if (ret)
+ goto out_unlock;
+
+ ret = gfs2_rs_alloc(ip);
+ if (ret)
+ goto out_putw;
+
+ ret = __gfs2_fallocate(file, mode, offset, len);
+ if (ret)
+ gfs2_rs_deltree(ip->i_res);
+out_putw:
+ put_write_access(inode);
out_unlock:
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
mutex_unlock(&inode->i_mutex);
- return error;
+ return ret;
}
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8f0c19d1d943..a23524aa3eac 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -836,8 +836,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
gh->gh_flags = flags;
gh->gh_iflags = 0;
gh->gh_ip = _RET_IP_;
- if (gh->gh_owner_pid)
- put_pid(gh->gh_owner_pid);
+ put_pid(gh->gh_owner_pid);
gh->gh_owner_pid = get_pid(task_pid(current));
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 1cc0bba6313f..fe91951c3361 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,6 +28,8 @@
#include "trans.h"
#include "dir.h"
+struct workqueue_struct *gfs2_freeze_wq;
+
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
@@ -94,11 +96,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
* on the stack */
tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
tr.tr_ip = _RET_IP_;
- sb_start_intwrite(sdp->sd_vfs);
- if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) {
- sb_end_intwrite(sdp->sd_vfs);
+ if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0)
return;
- }
WARN_ON_ONCE(current->journal_info);
current->journal_info = &tr;
@@ -469,20 +468,19 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
static void freeze_go_sync(struct gfs2_glock *gl)
{
+ int error = 0;
struct gfs2_sbd *sdp = gl->gl_sbd;
- DEFINE_WAIT(wait);
if (gl->gl_state == LM_ST_SHARED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
- atomic_set(&sdp->sd_log_freeze, 1);
- wake_up(&sdp->sd_logd_waitq);
- do {
- prepare_to_wait(&sdp->sd_log_frozen_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (atomic_read(&sdp->sd_log_freeze))
- io_schedule();
- } while(atomic_read(&sdp->sd_log_freeze));
- finish_wait(&sdp->sd_log_frozen_wait, &wait);
+ atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
+ error = freeze_super(sdp->sd_vfs);
+ if (error) {
+ printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error);
+ gfs2_assert_withdraw(sdp, 0);
+ }
+ queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
+ gfs2_log_flush(sdp, NULL, FREEZE_FLUSH);
}
}
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index 7455d2629bcb..8ed1857c1a8d 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -12,6 +12,8 @@
#include "incore.h"
+extern struct workqueue_struct *gfs2_freeze_wq;
+
extern const struct gfs2_glock_operations gfs2_meta_glops;
extern const struct gfs2_glock_operations gfs2_inode_glops;
extern const struct gfs2_glock_operations gfs2_rgrp_glops;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 39e7e9959b74..7a2dbbc0d634 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -97,6 +97,7 @@ struct gfs2_rgrpd {
#define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */
#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */
#define GFS2_RDF_ERROR 0x40000000 /* error in rg */
+#define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */
#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
spinlock_t rd_rsspin; /* protects reservation related vars */
struct rb_root rd_rstree; /* multi-block reservation tree */
@@ -587,6 +588,12 @@ enum {
SDF_SKIP_DLM_UNLOCK = 8,
};
+enum gfs2_freeze_state {
+ SFS_UNFROZEN = 0,
+ SFS_STARTING_FREEZE = 1,
+ SFS_FROZEN = 2,
+};
+
#define GFS2_FSNAME_LEN 256
struct gfs2_inum_host {
@@ -684,6 +691,7 @@ struct gfs2_sbd {
struct gfs2_holder sd_live_gh;
struct gfs2_glock *sd_rename_gl;
struct gfs2_glock *sd_freeze_gl;
+ struct work_struct sd_freeze_work;
wait_queue_head_t sd_glock_wait;
atomic_t sd_glock_disposal;
struct completion sd_locking_init;
@@ -788,6 +796,9 @@ struct gfs2_sbd {
wait_queue_head_t sd_log_flush_wait;
int sd_log_error;
+ atomic_t sd_reserving_log;
+ wait_queue_head_t sd_reserving_log_wait;
+
unsigned int sd_log_flush_head;
u64 sd_log_flush_wrapped;
@@ -797,12 +808,8 @@ struct gfs2_sbd {
/* For quiescing the filesystem */
struct gfs2_holder sd_freeze_gh;
- struct gfs2_holder sd_freeze_root_gh;
- struct gfs2_holder sd_thaw_gh;
- atomic_t sd_log_freeze;
- atomic_t sd_frozen_root;
- wait_queue_head_t sd_frozen_root_wait;
- wait_queue_head_t sd_log_frozen_wait;
+ atomic_t sd_freeze_state;
+ struct mutex sd_freeze_mutex;
char sd_fsname[GFS2_FSNAME_LEN];
char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c4ed823d150e..9054002ebe70 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -596,7 +596,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_glock *io_gl;
- struct dentry *d;
int error, free_vfs_inode = 0;
u32 aflags = 0;
unsigned blocks = 1;
@@ -624,22 +623,18 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
inode = gfs2_dir_search(dir, &dentry->d_name, !S_ISREG(mode) || excl);
error = PTR_ERR(inode);
if (!IS_ERR(inode)) {
- d = d_splice_alias(inode, dentry);
- error = PTR_ERR(d);
- if (IS_ERR(d)) {
- inode = ERR_CAST(d);
+ if (S_ISDIR(inode->i_mode)) {
+ iput(inode);
+ inode = ERR_PTR(-EISDIR);
goto fail_gunlock;
}
+ d_instantiate(dentry, inode);
error = 0;
if (file) {
- if (S_ISREG(inode->i_mode)) {
- WARN_ON(d != NULL);
+ if (S_ISREG(inode->i_mode))
error = finish_open(file, dentry, gfs2_open_common, opened);
- } else {
- error = finish_no_open(file, d);
- }
- } else {
- dput(d);
+ else
+ error = finish_no_open(file, NULL);
}
gfs2_glock_dq_uninit(ghs);
return error;
@@ -1045,11 +1040,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
if (error)
return error;
- error = gfs2_dir_check(&dip->i_inode, name, ip);
- if (error)
- return error;
-
- return 0;
+ return gfs2_dir_check(&dip->i_inode, name, ip);
}
/**
@@ -1254,11 +1245,8 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
if (d != NULL)
dentry = d;
if (dentry->d_inode) {
- if (!(*opened & FILE_OPENED)) {
- if (d == NULL)
- dget(dentry);
- return finish_no_open(file, dentry);
- }
+ if (!(*opened & FILE_OPENED))
+ return finish_no_open(file, d);
dput(d);
return 0;
}
@@ -1622,26 +1610,18 @@ int gfs2_permission(struct inode *inode, int mask)
{
struct gfs2_inode *ip;
struct gfs2_holder i_gh;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
int unlock = 0;
- int frozen_root = 0;
ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) &&
- inode == sdp->sd_root_dir->d_inode &&
- atomic_inc_not_zero(&sdp->sd_frozen_root)))
- frozen_root = 1;
- else {
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
- if (error)
- return error;
- unlock = 1;
- }
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+ if (error)
+ return error;
+ unlock = 1;
}
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
@@ -1650,8 +1630,6 @@ int gfs2_permission(struct inode *inode, int mask)
error = generic_permission(inode, mask);
if (unlock)
gfs2_glock_dq_uninit(&i_gh);
- else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
- wake_up(&sdp->sd_frozen_root_wait);
return error;
}
@@ -1824,29 +1802,19 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct inode *inode = dentry->d_inode;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
int error;
int unlock = 0;
- int frozen_root = 0;
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) &&
- inode == sdp->sd_root_dir->d_inode &&
- atomic_inc_not_zero(&sdp->sd_frozen_root)))
- frozen_root = 1;
- else {
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
- if (error)
- return error;
- unlock = 1;
- }
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+ if (error)
+ return error;
+ unlock = 1;
}
generic_fillattr(inode, stat);
if (unlock)
gfs2_glock_dq_uninit(&gh);
- else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
- wake_up(&sdp->sd_frozen_root_wait);
return 0;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 3966fadbcebd..536e7a6252cd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -339,6 +339,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
{
+ int ret = 0;
unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
unsigned wanted = blks + reserved_blks;
DEFINE_WAIT(wait);
@@ -362,9 +363,13 @@ retry:
} while(free_blocks <= wanted);
finish_wait(&sdp->sd_log_waitq, &wait);
}
+ atomic_inc(&sdp->sd_reserving_log);
if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
- free_blocks - blks) != free_blocks)
+ free_blocks - blks) != free_blocks) {
+ if (atomic_dec_and_test(&sdp->sd_reserving_log))
+ wake_up(&sdp->sd_reserving_log_wait);
goto retry;
+ }
trace_gfs2_log_blocks(sdp, -blks);
/*
@@ -377,9 +382,11 @@ retry:
down_read(&sdp->sd_log_flush_lock);
if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
gfs2_log_release(sdp, blks);
- return -EROFS;
+ ret = -EROFS;
}
- return 0;
+ if (atomic_dec_and_test(&sdp->sd_reserving_log))
+ wake_up(&sdp->sd_reserving_log_wait);
+ return ret;
}
/**
@@ -652,9 +659,12 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
u32 hash;
int rw = WRITE_FLUSH_FUA | REQ_META;
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+ enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
lh = page_address(page);
clear_page(lh);
+ gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
+
tail = current_tail(sdp);
lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -695,6 +705,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
enum gfs2_flush_type type)
{
struct gfs2_trans *tr;
+ enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
down_write(&sdp->sd_log_flush_lock);
@@ -713,8 +724,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
INIT_LIST_HEAD(&tr->tr_ail1_list);
INIT_LIST_HEAD(&tr->tr_ail2_list);
tr->tr_first = sdp->sd_log_flush_head;
+ if (unlikely (state == SFS_FROZEN))
+ gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
}
+ if (unlikely(state == SFS_FROZEN))
+ gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp,
sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
@@ -745,8 +760,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
- if (atomic_read(&sdp->sd_log_freeze))
- type = FREEZE_FLUSH;
if (type != NORMAL_FLUSH) {
if (!sdp->sd_log_idle) {
for (;;) {
@@ -763,21 +776,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
}
if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH)
gfs2_log_shutdown(sdp);
- if (type == FREEZE_FLUSH) {
- int error;
-
- atomic_set(&sdp->sd_log_freeze, 0);
- wake_up(&sdp->sd_log_frozen_wait);
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
- LM_ST_SHARED, 0,
- &sdp->sd_thaw_gh);
- if (error) {
- printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error);
- gfs2_assert_withdraw(sdp, 0);
- }
- else
- gfs2_glock_dq_uninit(&sdp->sd_thaw_gh);
- }
+ if (type == FREEZE_FLUSH)
+ atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
}
trace_gfs2_log_flush(sdp, 0);
@@ -888,7 +888,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
{
- return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze));
+ return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
}
static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 82b6ac829656..241a399bf83d 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -30,6 +30,7 @@
#include "quota.h"
#include "recovery.h"
#include "dir.h"
+#include "glops.h"
struct workqueue_struct *gfs2_control_wq;
@@ -161,9 +162,14 @@ static int __init init_gfs2_fs(void)
if (!gfs2_control_wq)
goto fail_recovery;
+ gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
+
+ if (!gfs2_freeze_wq)
+ goto fail_control;
+
gfs2_page_pool = mempool_create_page_pool(64, 0);
if (!gfs2_page_pool)
- goto fail_control;
+ goto fail_freeze;
gfs2_register_debugfs();
@@ -171,6 +177,8 @@ static int __init init_gfs2_fs(void)
return 0;
+fail_freeze:
+ destroy_workqueue(gfs2_freeze_wq);
fail_control:
destroy_workqueue(gfs2_control_wq);
fail_recovery:
@@ -224,6 +232,7 @@ static void __exit exit_gfs2_fs(void)
unregister_filesystem(&gfs2meta_fs_type);
destroy_workqueue(gfs_recovery_wq);
destroy_workqueue(gfs2_control_wq);
+ destroy_workqueue(gfs2_freeze_wq);
list_lru_destroy(&gfs2_qd_lru);
rcu_barrier();
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 455bac77feb5..8633ad328ee2 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -129,11 +129,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
init_rwsem(&sdp->sd_log_flush_lock);
atomic_set(&sdp->sd_log_in_flight, 0);
+ atomic_set(&sdp->sd_reserving_log, 0);
+ init_waitqueue_head(&sdp->sd_reserving_log_wait);
init_waitqueue_head(&sdp->sd_log_flush_wait);
- init_waitqueue_head(&sdp->sd_log_frozen_wait);
- atomic_set(&sdp->sd_log_freeze, 0);
- atomic_set(&sdp->sd_frozen_root, 0);
- init_waitqueue_head(&sdp->sd_frozen_root_wait);
+ atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+ mutex_init(&sdp->sd_freeze_mutex);
return sdp;
}
@@ -760,15 +760,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
gfs2_glock_dq_uninit(&ji_gh);
jindex = 0;
- if (!sdp->sd_args.ar_spectator) {
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
- &sdp->sd_thaw_gh);
- if (error) {
- fs_err(sdp, "can't acquire freeze glock: %d\n", error);
- goto fail_jinode_gh;
- }
- }
- gfs2_glock_dq_uninit(&sdp->sd_thaw_gh);
+ INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func);
return 0;
fail_jinode_gh:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 64b29f7f6b4c..c8b148bbdc8b 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1360,13 +1360,8 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
- if (sdp->sd_quota_bitmap) {
- if (is_vmalloc_addr(sdp->sd_quota_bitmap))
- vfree(sdp->sd_quota_bitmap);
- else
- kfree(sdp->sd_quota_bitmap);
- sdp->sd_quota_bitmap = NULL;
- }
+ kvfree(sdp->sd_quota_bitmap);
+ sdp->sd_quota_bitmap = NULL;
}
static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7474c413ffd1..9150207f365c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -936,7 +936,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
- rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+ rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
if (rgd->rd_data > sdp->sd_max_rg_data)
sdp->sd_max_rg_data = rgd->rd_data;
spin_lock(&sdp->sd_rindex_spin);
@@ -955,6 +955,36 @@ fail:
}
/**
+ * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
+ * @sdp: the GFS2 superblock
+ *
+ * The purpose of this function is to select a subset of the resource groups
+ * and mark them as PREFERRED. We do it in such a way that each node prefers
+ * to use a unique set of rgrps to minimize glock contention.
+ */
+static void set_rgrp_preferences(struct gfs2_sbd *sdp)
+{
+ struct gfs2_rgrpd *rgd, *first;
+ int i;
+
+ /* Skip an initial number of rgrps, based on this node's journal ID.
+ That should start each node out on its own set. */
+ rgd = gfs2_rgrpd_get_first(sdp);
+ for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
+ rgd = gfs2_rgrpd_get_next(rgd);
+ first = rgd;
+
+ do {
+ rgd->rd_flags |= GFS2_RDF_PREFERRED;
+ for (i = 0; i < sdp->sd_journals; i++) {
+ rgd = gfs2_rgrpd_get_next(rgd);
+ if (rgd == first)
+ break;
+ }
+ } while (rgd != first);
+}
+
+/**
* gfs2_ri_update - Pull in a new resource index from the disk
* @ip: pointer to the rindex inode
*
@@ -973,6 +1003,8 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
if (error < 0)
return error;
+ set_rgrp_preferences(sdp);
+
sdp->sd_rindex_uptodate = 1;
return 0;
}
@@ -1891,6 +1923,25 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
}
/**
+ * fast_to_acquire - determine if a resource group will be fast to acquire
+ *
+ * If this is one of our preferred rgrps, it should be quicker to acquire,
+ * because we tried to set ourselves up as dlm lock master.
+ */
+static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
+{
+ struct gfs2_glock *gl = rgd->rd_gl;
+
+ if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
+ !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
+ !test_bit(GLF_DEMOTE, &gl->gl_flags))
+ return 1;
+ if (rgd->rd_flags & GFS2_RDF_PREFERRED)
+ return 1;
+ return 0;
+}
+
+/**
* gfs2_inplace_reserve - Reserve space in the filesystem
* @ip: the inode to reserve space for
* @ap: the allocation parameters
@@ -1932,10 +1983,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
rg_locked = 0;
if (skip && skip--)
goto next_rgrp;
- if (!gfs2_rs_active(rs) && (loops < 2) &&
- gfs2_rgrp_used_recently(rs, 1000) &&
- gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
- goto next_rgrp;
+ if (!gfs2_rs_active(rs)) {
+ if (loops == 0 &&
+ !fast_to_acquire(rs->rs_rbm.rgd))
+ goto next_rgrp;
+ if ((loops < 2) &&
+ gfs2_rgrp_used_recently(rs, 1000) &&
+ gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+ goto next_rgrp;
+ }
error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
LM_ST_EXCLUSIVE, flags,
&rs->rs_rgd_gh);
@@ -2195,6 +2251,9 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
trace_gfs2_rs(rs, TRACE_RS_CLAIM);
if (rs->rs_free && !ret)
goto out;
+ /* We used up our block reservation, so we should
+ reserve more blocks next time. */
+ atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
}
__rs_deltree(rs);
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 5d8f085f7ade..b104f4af3afd 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -20,6 +20,7 @@
*/
#define RGRP_RSRV_MINBYTES 8
#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
+#define RGRP_RSRV_ADDBLKS 64
struct gfs2_rgrpd;
struct gfs2_sbd;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a346f56c4c6d..5b327f837de7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -26,6 +26,7 @@
#include <linux/wait.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
+#include <linux/kernel.h>
#include "gfs2.h"
#include "incore.h"
@@ -399,7 +400,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
- struct gfs2_holder thaw_gh;
+ struct gfs2_holder freeze_gh;
struct gfs2_log_header_host head;
int error;
@@ -408,7 +409,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
return error;
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
- &thaw_gh);
+ &freeze_gh);
if (error)
goto fail_threads;
@@ -434,13 +435,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- gfs2_glock_dq_uninit(&thaw_gh);
+ gfs2_glock_dq_uninit(&freeze_gh);
return 0;
fail:
- thaw_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq_uninit(&thaw_gh);
+ freeze_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_dq_uninit(&freeze_gh);
fail_threads:
kthread_stop(sdp->sd_quotad_process);
kthread_stop(sdp->sd_logd_process);
@@ -580,14 +581,15 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
struct buffer_head *m_bh, *l_bh;
int error;
+ sb_start_write(sb);
error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
&gh);
if (error)
- return error;
+ goto out;
error = gfs2_meta_inode_buffer(m_ip, &m_bh);
if (error)
- goto out;
+ goto out_unlock;
spin_lock(&sdp->sd_statfs_spin);
gfs2_statfs_change_in(m_sc, m_bh->b_data +
@@ -615,8 +617,10 @@ out_bh2:
brelse(l_bh);
out_bh:
brelse(m_bh);
-out:
+out_unlock:
gfs2_glock_dq_uninit(&gh);
+out:
+ sb_end_write(sb);
return error;
}
@@ -643,14 +647,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
struct lfcc *lfcc;
LIST_HEAD(list);
struct gfs2_log_header_host lh;
- struct gfs2_inode *dip = GFS2_I(sdp->sd_root_dir->d_inode);
int error;
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0,
- &sdp->sd_freeze_root_gh);
- if (error)
- return error;
- atomic_set(&sdp->sd_frozen_root, 1);
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
if (!lfcc) {
@@ -692,11 +690,6 @@ out:
gfs2_glock_dq_uninit(&lfcc->gh);
kfree(lfcc);
}
- if (error) {
- atomic_dec(&sdp->sd_frozen_root);
- wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0);
- gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh);
- }
return error;
}
@@ -834,18 +827,14 @@ out:
static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
{
- struct gfs2_holder thaw_gh;
+ struct gfs2_holder freeze_gh;
int error;
error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE,
- &thaw_gh);
+ &freeze_gh);
if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return error;
- down_write(&sdp->sd_log_flush_lock);
- clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- up_write(&sdp->sd_log_flush_lock);
-
kthread_stop(sdp->sd_quotad_process);
kthread_stop(sdp->sd_logd_process);
@@ -853,11 +842,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
gfs2_quota_sync(sdp->sd_vfs, 0);
gfs2_statfs_sync(sdp->sd_vfs, 0);
+ down_write(&sdp->sd_log_flush_lock);
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+ up_write(&sdp->sd_log_flush_lock);
+
gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH);
+ wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
- if (thaw_gh.gh_gl)
- gfs2_glock_dq_uninit(&thaw_gh);
+ if (freeze_gh.gh_gl)
+ gfs2_glock_dq_uninit(&freeze_gh);
gfs2_quota_cleanup(sdp);
@@ -943,11 +937,41 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
struct gfs2_sbd *sdp = sb->s_fs_info;
gfs2_quota_sync(sb, -1);
- if (wait && sdp && !atomic_read(&sdp->sd_log_freeze))
+ if (wait && sdp)
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
return 0;
}
+void gfs2_freeze_func(struct work_struct *work)
+{
+ int error;
+ struct gfs2_holder freeze_gh;
+ struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
+ struct super_block *sb = sdp->sd_vfs;
+
+ atomic_inc(&sb->s_active);
+ error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
+ &freeze_gh);
+ if (error) {
+ printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error);
+ gfs2_assert_withdraw(sdp, 0);
+ }
+ else {
+ atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+ error = thaw_super(sb);
+ if (error) {
+ printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n",
+ error);
+ gfs2_assert_withdraw(sdp, 0);
+ }
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ freeze_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_dq_uninit(&freeze_gh);
+ }
+ deactivate_super(sb);
+ return;
+}
+
/**
* gfs2_freeze - prevent further writes to the filesystem
* @sb: the VFS structure for the filesystem
@@ -957,10 +981,16 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
static int gfs2_freeze(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
- int error;
+ int error = 0;
- if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
- return -EINVAL;
+ mutex_lock(&sdp->sd_freeze_mutex);
+ if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
+ goto out;
+
+ if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+ error = -EINVAL;
+ goto out;
+ }
for (;;) {
error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
@@ -980,7 +1010,10 @@ static int gfs2_freeze(struct super_block *sb)
fs_err(sdp, "retrying...\n");
msleep(1000);
}
- return 0;
+ error = 0;
+out:
+ mutex_unlock(&sdp->sd_freeze_mutex);
+ return error;
}
/**
@@ -993,10 +1026,15 @@ static int gfs2_unfreeze(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
+ mutex_lock(&sdp->sd_freeze_mutex);
+ if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
+ sdp->sd_freeze_gh.gh_gl == NULL) {
+ mutex_unlock(&sdp->sd_freeze_mutex);
+ return 0;
+ }
+
gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
- atomic_dec(&sdp->sd_frozen_root);
- wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0);
- gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh);
+ mutex_unlock(&sdp->sd_freeze_mutex);
return 0;
}
@@ -1618,8 +1656,8 @@ const struct super_operations gfs2_super_ops = {
.evict_inode = gfs2_evict_inode,
.put_super = gfs2_put_super,
.sync_fs = gfs2_sync_fs,
- .freeze_fs = gfs2_freeze,
- .unfreeze_fs = gfs2_unfreeze,
+ .freeze_super = gfs2_freeze,
+ .thaw_super = gfs2_unfreeze,
.statfs = gfs2_statfs,
.remount_fs = gfs2_remount_fs,
.drop_inode = gfs2_drop_inode,
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 90e3322ffa10..73c97dccae21 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -45,6 +45,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
struct buffer_head *l_bh);
extern int gfs2_statfs_sync(struct super_block *sb, int type);
+extern void gfs2_freeze_func(struct work_struct *work);
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 42bfd3361979..88bff2430669 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -89,14 +89,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr = current->journal_info;
s64 nbuf;
+ int alloced = tr->tr_alloced;
+
BUG_ON(!tr);
current->journal_info = NULL;
if (!tr->tr_touched) {
gfs2_log_release(sdp, tr->tr_reserved);
- if (tr->tr_alloced)
+ if (alloced) {
kfree(tr);
- sb_end_intwrite(sdp->sd_vfs);
+ sb_end_intwrite(sdp->sd_vfs);
+ }
return;
}
@@ -109,13 +112,14 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
gfs2_print_trans(tr);
gfs2_log_commit(sdp, tr);
- if (tr->tr_alloced && !tr->tr_attached)
+ if (alloced && !tr->tr_attached)
kfree(tr);
up_read(&sdp->sd_log_flush_lock);
if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
- sb_end_intwrite(sdp->sd_vfs);
+ if (alloced)
+ sb_end_intwrite(sdp->sd_vfs);
}
static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
@@ -192,6 +196,7 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
struct gfs2_meta_header *mh;
struct gfs2_trans *tr;
+ enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
tr = current->journal_info;
tr->tr_touched = 1;
@@ -205,6 +210,10 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
(unsigned long long)bd->bd_bh->b_blocknr);
BUG();
}
+ if (unlikely(state == SFS_FROZEN)) {
+ printk(KERN_INFO "GFS2:adding buf while frozen\n");
+ gfs2_assert_withdraw(sdp, 0);
+ }
gfs2_pin(sdp, bd->bd_bh);
mh->__pad0 = cpu_to_be64(0);
mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad36192..77c9a7812542 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -518,10 +518,12 @@ static int ioctl_fsfreeze(struct file *filp)
return -EPERM;
/* If filesystem doesn't support freeze feature, return. */
- if (sb->s_op->freeze_fs == NULL)
+ if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL)
return -EOPNOTSUPP;
/* Freeze */
+ if (sb->s_op->freeze_super)
+ return sb->s_op->freeze_super(sb);
return freeze_super(sb);
}
@@ -533,6 +535,8 @@ static int ioctl_fsthaw(struct file *filp)
return -EPERM;
/* Thaw */
+ if (sb->s_op->thaw_super)
+ return sb->s_op->thaw_super(sb);
return thaw_super(sb);
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index fe839b915116..d67a16f2a45d 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -174,27 +174,6 @@ struct iso9660_options{
* Compute the hash for the isofs name corresponding to the dentry.
*/
static int
-isofs_hash_common(struct qstr *qstr, int ms)
-{
- const char *name;
- int len;
-
- len = qstr->len;
- name = qstr->name;
- if (ms) {
- while (len && name[len-1] == '.')
- len--;
- }
-
- qstr->hash = full_name_hash(name, len);
-
- return 0;
-}
-
-/*
- * Compute the hash for the isofs name corresponding to the dentry.
- */
-static int
isofs_hashi_common(struct qstr *qstr, int ms)
{
const char *name;
@@ -263,6 +242,27 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
}
#ifdef CONFIG_JOLIET
+/*
+ * Compute the hash for the isofs name corresponding to the dentry.
+ */
+static int
+isofs_hash_common(struct qstr *qstr, int ms)
+{
+ const char *name;
+ int len;
+
+ len = qstr->len;
+ name = qstr->name;
+ if (ms) {
+ while (len && name[len-1] == '.')
+ len--;
+ }
+
+ qstr->hash = full_name_hash(name, len);
+
+ return 0;
+}
+
static int
isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr)
{
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e4dc74713a43..1df94fabe4eb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1853,13 +1853,12 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
journal->j_chksum_driver = NULL;
return 0;
}
- }
- /* Precompute checksum seed for all metadata */
- if (jbd2_journal_has_csum_v2or3(journal))
+ /* Precompute checksum seed for all metadata */
journal->j_csum_seed = jbd2_chksum(journal, ~0,
sb->s_uuid,
sizeof(sb->s_uuid));
+ }
}
/* If enabling v1 checksums, downgrade superblock */
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 13db95f54176..56598742dde4 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -53,7 +53,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
static LIST_HEAD(nlm_blocked);
static DEFINE_SPINLOCK(nlm_blocked_lock);
-#ifdef LOCKD_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
{
/*
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 5228f201d3d5..77fec6a55f57 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -378,7 +378,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
loff_t offset = header->args.offset;
size_t count = header->args.count;
struct page **pages = header->args.pages;
- int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+ int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
unsigned int pg_len;
struct blk_plug plug;
int i;
@@ -812,7 +812,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
/* Optimize common case that writes from 0 to end of file */
end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
- if (end != NFS_I(inode)->npages) {
+ if (end != inode->i_mapping->nrpages) {
rcu_read_lock();
end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
rcu_read_unlock();
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index e966c023b1b7..acbf9ca4018c 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -65,17 +65,18 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
+ mutex_lock(&nn->bl_mutex);
bl_pipe_msg.bl_wq = &nn->bl_wq;
b->simple.len += 4; /* single volume */
if (b->simple.len > PAGE_SIZE)
- return -EIO;
+ goto out_unlock;
memset(msg, 0, sizeof(*msg));
msg->len = sizeof(*bl_msg) + b->simple.len;
msg->data = kzalloc(msg->len, gfp_mask);
if (!msg->data)
- goto out;
+ goto out_free_data;
bl_msg = msg->data;
bl_msg->type = BL_DEVICE_MOUNT,
@@ -87,7 +88,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
if (rc < 0) {
remove_wait_queue(&nn->bl_wq, &wq);
- goto out;
+ goto out_free_data;
}
set_current_state(TASK_UNINTERRUPTIBLE);
@@ -97,12 +98,14 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
if (reply->status != BL_DEVICE_REQUEST_PROC) {
printk(KERN_WARNING "%s failed to decode device: %d\n",
__func__, reply->status);
- goto out;
+ goto out_free_data;
}
dev = MKDEV(reply->major, reply->minor);
-out:
+out_free_data:
kfree(msg->data);
+out_unlock:
+ mutex_unlock(&nn->bl_mutex);
return dev;
}
@@ -232,6 +235,7 @@ static int nfs4blocklayout_net_init(struct net *net)
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct dentry *dentry;
+ mutex_init(&nn->bl_mutex);
init_waitqueue_head(&nn->bl_wq);
nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
if (IS_ERR(nn->bl_device_pipe))
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 73466b934090..e36a9d78ea49 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
goto out_iput;
res->size = i_size_read(inode);
res->change_attr = delegation->change_attr;
- if (nfsi->npages != 0)
+ if (nfsi->nrequests != 0)
res->change_attr++;
res->ctime = inode->i_ctime;
res->mtime = inode->i_mtime;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5853f53db732..7f3f60641344 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -125,6 +125,8 @@ again:
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
+ if (!nfs4_valid_open_stateid(state))
+ continue;
if (!nfs4_stateid_match(&state->stateid, stateid))
continue;
get_nfs_open_context(ctx);
@@ -193,7 +195,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
{
int res = 0;
- res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync);
+ if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ res = nfs4_proc_delegreturn(inode,
+ delegation->cred,
+ &delegation->stateid,
+ issync);
nfs_free_delegation(delegation);
return res;
}
@@ -380,11 +386,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
- int err;
+ int err = 0;
if (delegation == NULL)
return 0;
do {
+ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ break;
err = nfs_delegation_claim_opens(inode, &delegation->stateid);
if (!issync || err != -EAGAIN)
break;
@@ -605,10 +613,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl
rcu_read_unlock();
}
+static void nfs_revoke_delegation(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL) {
+ set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
+ nfs_mark_return_delegation(NFS_SERVER(inode), delegation);
+ }
+ rcu_read_unlock();
+}
+
void nfs_remove_bad_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
+ nfs_revoke_delegation(inode);
delegation = nfs_inode_detach_delegation(inode);
if (delegation) {
nfs_inode_find_state_and_recover(inode, &delegation->stateid);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 5c1cce39297f..e3c20a3ccc93 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -31,6 +31,7 @@ enum {
NFS_DELEGATION_RETURN_IF_CLOSED,
NFS_DELEGATION_REFERENCED,
NFS_DELEGATION_RETURNING,
+ NFS_DELEGATION_REVOKED,
};
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 06e8cfcbb670..6e62155abf26 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1527,6 +1527,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
case -ENOENT:
d_drop(dentry);
d_add(dentry, NULL);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
case -EISDIR:
case -ENOTDIR:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 20cffc830468..10bf07280f4a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+ nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
if (dreq->l_ctx != NULL)
nfs_put_lock_context(dreq->l_ctx);
if (dreq->ctx != NULL)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 46fab1cb455a..7afb52f6a25a 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -145,9 +145,6 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- if (state == NULL)
- break;
- nfs_remove_bad_delegation(state->inode);
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 9bb806a76d99..bfecac781f19 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -204,8 +204,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds)
ifdebug(FACILITY)
print_ds(ds);
- if (ds->ds_clp)
- nfs_put_client(ds->ds_clp);
+ nfs_put_client(ds->ds_clp);
while (!list_empty(&ds->ds_addrs)) {
da = list_first_entry(&ds->ds_addrs,
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 3ef01f0ba0bc..d63bea8bbfbb 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -269,8 +269,8 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp)
if (!fscache_maybe_release_page(cookie, page, gfp))
return 0;
- nfs_add_fscache_stats(page->mapping->host,
- NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+ nfs_inc_fscache_stats(page->mapping->host,
+ NFSIOS_FSCACHE_PAGES_UNCACHED);
}
return 1;
@@ -293,8 +293,8 @@ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
BUG_ON(!PageLocked(page));
fscache_uncache_page(cookie, page);
- nfs_add_fscache_stats(page->mapping->host,
- NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+ nfs_inc_fscache_stats(page->mapping->host,
+ NFSIOS_FSCACHE_PAGES_UNCACHED);
}
/*
@@ -343,19 +343,19 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
case 0: /* read BIO submitted (page in fscache) */
dfprintk(FSCACHE,
"NFS: readpage_from_fscache: BIO submitted\n");
- nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1);
+ nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK);
return ret;
case -ENOBUFS: /* inode not in cache */
case -ENODATA: /* page not in cache */
- nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
+ nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL);
dfprintk(FSCACHE,
"NFS: readpage_from_fscache %d\n", ret);
return 1;
default:
dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret);
- nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
+ nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL);
}
return ret;
}
@@ -429,11 +429,11 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
if (ret != 0) {
fscache_uncache_page(nfs_i_fscache(inode), page);
- nfs_add_fscache_stats(inode,
- NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
- nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+ nfs_inc_fscache_stats(inode,
+ NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL);
+ nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED);
} else {
- nfs_add_fscache_stats(inode,
- NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1);
+ nfs_inc_fscache_stats(inode,
+ NFSIOS_FSCACHE_PAGES_WRITTEN_OK);
}
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6388a59f2add..4bffe637ea32 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -192,6 +192,7 @@ void nfs_zap_caches(struct inode *inode)
nfs_zap_caches_locked(inode);
spin_unlock(&inode->i_lock);
}
+EXPORT_SYMBOL_GPL(nfs_zap_caches);
void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
{
@@ -626,7 +627,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
- int err;
+ int err = 0;
trace_nfs_getattr_enter(inode);
/* Flush out writes to the server in order to update c/mtime. */
@@ -1149,7 +1150,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
- && nfsi->npages == 0) {
+ && nfsi->nrequests == 0) {
i_size_write(inode, nfs_size_to_loff_t(fattr->size));
ret |= NFS_INO_INVALID_ATTR;
}
@@ -1192,7 +1193,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size);
- if (cur_size != new_isize && nfsi->npages == 0)
+ if (cur_size != new_isize && nfsi->nrequests == 0)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
}
@@ -1619,7 +1620,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */
- if ((nfsi->npages == 0) || new_isize > cur_isize) {
+ if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
invalid &= ~NFS_INO_REVAL_PAGECACHE;
@@ -1784,7 +1785,7 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
INIT_LIST_HEAD(&nfsi->commit_info.list);
- nfsi->npages = 0;
+ nfsi->nrequests = 0;
nfsi->commit_info.ncommit = 0;
atomic_set(&nfsi->commit_info.rpcs_out, 0);
atomic_set(&nfsi->silly_count, 1);
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index c5832487c456..0cb806fbd4c4 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -55,6 +55,11 @@ static inline void nfs_add_fscache_stats(struct inode *inode,
{
this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
}
+static inline void nfs_inc_fscache_stats(struct inode *inode,
+ enum nfs_stat_fscachecounters stat)
+{
+ this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]);
+}
#endif
static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index ef221fb8a183..f0e06e4acbef 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -19,6 +19,7 @@ struct nfs_net {
struct rpc_pipe *bl_device_pipe;
struct bl_dev_msg bl_mount_reply;
wait_queue_head_t bl_wq;
+ struct mutex bl_mutex;
struct list_head nfs_client_list;
struct list_head nfs_volume_list;
#if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index d10333a197bf..7afb8947dfdf 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -6,6 +6,8 @@
#define __LINUX_FS_NFS_NFS4_2_H
/* nfs4.2proc.c */
+int nfs42_proc_allocate(struct file *, loff_t, loff_t);
+int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
/* nfs4.2xdr.h */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 0886f1db5917..cb170722769c 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -32,6 +32,81 @@ static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
return ret;
}
+static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode = file_inode(filep);
+ struct nfs42_falloc_args args = {
+ .falloc_fh = NFS_FH(inode),
+ .falloc_offset = offset,
+ .falloc_length = len,
+ };
+ struct nfs42_falloc_res res;
+ struct nfs_server *server = NFS_SERVER(inode);
+ int status;
+
+ msg->rpc_argp = &args;
+ msg->rpc_resp = &res;
+
+ status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+ if (status)
+ return status;
+
+ return nfs4_call_sync(server->client, server, msg,
+ &args.seq_args, &res.seq_res, 0);
+}
+
+static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
+ loff_t offset, loff_t len)
+{
+ struct nfs_server *server = NFS_SERVER(file_inode(filep));
+ struct nfs4_exception exception = { };
+ int err;
+
+ do {
+ err = _nfs42_proc_fallocate(msg, filep, offset, len);
+ if (err == -ENOTSUPP)
+ return -EOPNOTSUPP;
+ err = nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
+int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE],
+ };
+ struct inode *inode = file_inode(filep);
+ int err;
+
+ if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
+ return -EOPNOTSUPP;
+
+ err = nfs42_proc_fallocate(&msg, filep, offset, len);
+ if (err == -EOPNOTSUPP)
+ NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
+ return err;
+}
+
+int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DEALLOCATE],
+ };
+ struct inode *inode = file_inode(filep);
+ int err;
+
+ if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE))
+ return -EOPNOTSUPP;
+
+ err = nfs42_proc_fallocate(&msg, filep, offset, len);
+ if (err == -EOPNOTSUPP)
+ NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
+ return err;
+}
+
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
{
struct inode *inode = file_inode(filep);
@@ -50,7 +125,7 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
struct nfs_server *server = NFS_SERVER(inode);
int status;
- if (!(server->caps & NFS_CAP_SEEK))
+ if (!nfs_server_capable(inode, NFS_CAP_SEEK))
return -ENOTSUPP;
status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index c90469b604b8..038a7e1521fa 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -4,6 +4,15 @@
#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
#define __LINUX_FS_NFS_NFS4_2XDR_H
+#define encode_fallocate_maxsz (encode_stateid_maxsz + \
+ 2 /* offset */ + \
+ 2 /* length */)
+#define encode_allocate_maxsz (op_encode_hdr_maxsz + \
+ encode_fallocate_maxsz)
+#define decode_allocate_maxsz (op_decode_hdr_maxsz)
+#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
+ encode_fallocate_maxsz)
+#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
#define encode_seek_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + \
2 /* offset */ + \
@@ -14,6 +23,18 @@
2 /* offset */ + \
2 /* length */)
+#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_allocate_maxsz)
+#define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_allocate_maxsz)
+#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_deallocate_maxsz)
+#define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_deallocate_maxsz)
#define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_seek_maxsz)
@@ -22,6 +43,30 @@
decode_seek_maxsz)
+static void encode_fallocate(struct xdr_stream *xdr,
+ struct nfs42_falloc_args *args)
+{
+ encode_nfs4_stateid(xdr, &args->falloc_stateid);
+ encode_uint64(xdr, args->falloc_offset);
+ encode_uint64(xdr, args->falloc_length);
+}
+
+static void encode_allocate(struct xdr_stream *xdr,
+ struct nfs42_falloc_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr);
+ encode_fallocate(xdr, args);
+}
+
+static void encode_deallocate(struct xdr_stream *xdr,
+ struct nfs42_falloc_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr);
+ encode_fallocate(xdr, args);
+}
+
static void encode_seek(struct xdr_stream *xdr,
struct nfs42_seek_args *args,
struct compound_hdr *hdr)
@@ -33,6 +78,42 @@ static void encode_seek(struct xdr_stream *xdr,
}
/*
+ * Encode ALLOCATE request
+ */
+static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs42_falloc_args *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->falloc_fh, &hdr);
+ encode_allocate(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
+ * Encode DEALLOCATE request
+ */
+static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs42_falloc_args *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->falloc_fh, &hdr);
+ encode_deallocate(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode SEEK request
*/
static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
@@ -50,6 +131,16 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
encode_nops(&hdr);
}
+static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
+{
+ return decode_op_hdr(xdr, OP_ALLOCATE);
+}
+
+static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
+{
+ return decode_op_hdr(xdr, OP_DEALLOCATE);
+}
+
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
{
int status;
@@ -73,6 +164,54 @@ out_overflow:
}
/*
+ * Decode ALLOCATE request
+ */
+static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs42_falloc_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_allocate(xdr, res);
+out:
+ return status;
+}
+
+/*
+ * Decode DEALLOCATE request
+ */
+static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs42_falloc_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_deallocate(xdr, res);
+out:
+ return status;
+}
+
+/*
* Decode SEEK request
*/
static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index be6cac37ea10..a08178764cf9 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -226,6 +226,7 @@ int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
/* nfs4proc.c */
+extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
struct rpc_message *, struct nfs4_sequence_args *,
struct nfs4_sequence_res *, int);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index ffdb28d86cf8..03311259b0c4 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -241,28 +241,25 @@ void nfs4_free_client(struct nfs_client *clp)
*/
static int nfs4_init_callback(struct nfs_client *clp)
{
+ struct rpc_xprt *xprt;
int error;
- if (clp->rpc_ops->version == 4) {
- struct rpc_xprt *xprt;
+ xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
- xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
-
- if (nfs4_has_session(clp)) {
- error = xprt_setup_backchannel(xprt,
- NFS41_BC_MIN_CALLBACKS);
- if (error < 0)
- return error;
- }
-
- error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
- if (error < 0) {
- dprintk("%s: failed to start callback. Error = %d\n",
- __func__, error);
+ if (nfs4_has_session(clp)) {
+ error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
+ if (error < 0)
return error;
- }
- __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
}
+
+ error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
+ if (error < 0) {
+ dprintk("%s: failed to start callback. Error = %d\n",
+ __func__, error);
+ return error;
+ }
+ __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+
return 0;
}
@@ -498,8 +495,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
atomic_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
- if (prev)
- nfs_put_client(prev);
+ nfs_put_client(prev);
prev = pos;
status = nfs_wait_client_init_complete(pos);
@@ -517,8 +513,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
atomic_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
- if (prev)
- nfs_put_client(prev);
+ nfs_put_client(prev);
prev = pos;
status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
@@ -549,8 +544,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
/* No match found. The server lost our clientid */
out:
- if (prev)
- nfs_put_client(prev);
+ nfs_put_client(prev);
dprintk("NFS: <-- %s status = %d\n", __func__, status);
return status;
}
@@ -641,8 +635,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
atomic_inc(&pos->cl_count);
spin_unlock(&nn->nfs_client_lock);
- if (prev)
- nfs_put_client(prev);
+ nfs_put_client(prev);
prev = pos;
status = nfs_wait_client_init_complete(pos);
@@ -675,8 +668,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
/* No matching nfs_client found. */
spin_unlock(&nn->nfs_client_lock);
dprintk("NFS: <-- %s status = %d\n", __func__, status);
- if (prev)
- nfs_put_client(prev);
+ nfs_put_client(prev);
return status;
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index c51fb4db9bfe..8b46389c4c5b 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -3,6 +3,8 @@
*
* Copyright (C) 1992 Rick Sladkey
*/
+#include <linux/fs.h>
+#include <linux/falloc.h>
#include <linux/nfs_fs.h>
#include "internal.h"
#include "fscache.h"
@@ -134,6 +136,32 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
return nfs_file_llseek(filep, offset, whence);
}
}
+
+static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t len)
+{
+ struct inode *inode = file_inode(filep);
+ long ret;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)))
+ return -EOPNOTSUPP;
+
+ ret = inode_newsize_ok(inode, offset + len);
+ if (ret < 0)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ ret = nfs42_proc_deallocate(filep, offset, len);
+ else
+ ret = nfs42_proc_allocate(filep, offset, len);
+ mutex_unlock(&inode->i_mutex);
+
+ nfs_zap_caches(inode);
+ return ret;
+}
#endif /* CONFIG_NFS_V4_2 */
const struct file_operations nfs4_file_operations = {
@@ -155,6 +183,9 @@ const struct file_operations nfs4_file_operations = {
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
.splice_write = iter_file_splice_write,
+#ifdef CONFIG_NFS_V4_2
+ .fallocate = nfs42_fallocate,
+#endif /* CONFIG_NFS_V4_2 */
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
};
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 405bd95c1f58..e7f8d5ff2581 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -158,8 +158,6 @@ static int nfs4_map_errors(int err)
return -EACCES;
case -NFS4ERR_MINOR_VERS_MISMATCH:
return -EPROTONOSUPPORT;
- case -NFS4ERR_ACCESS:
- return -EACCES;
case -NFS4ERR_FILE_OPEN:
return -EBUSY;
default:
@@ -344,7 +342,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
/* This is the error handling routine for processes that are allowed
* to sleep.
*/
-static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state *state = exception->state;
@@ -370,11 +368,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) {
- nfs_remove_bad_delegation(inode);
- exception->retry = 1;
- break;
- }
if (state == NULL)
break;
ret = nfs4_schedule_stateid_recovery(server, state);
@@ -1654,7 +1647,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
nfs_inode_find_state_and_recover(state->inode,
stateid);
nfs4_schedule_stateid_recovery(server, state);
- return 0;
+ return -EAGAIN;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
set_bit(NFS_DELEGATED_STATE, &state->flags);
@@ -2109,46 +2102,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
return ret;
}
+static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state)
+{
+ nfs_remove_bad_delegation(state->inode);
+ write_seqlock(&state->seqlock);
+ nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+ write_sequnlock(&state->seqlock);
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+}
+
+static void nfs40_clear_delegation_stateid(struct nfs4_state *state)
+{
+ if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL)
+ nfs_finish_clear_delegation_stateid(state);
+}
+
+static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ /* NFSv4.0 doesn't allow for delegation recovery on open expire */
+ nfs40_clear_delegation_stateid(state);
+ return nfs4_open_expired(sp, state);
+}
+
#if defined(CONFIG_NFS_V4_1)
-static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
+static void nfs41_check_delegation_stateid(struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- nfs4_stateid *stateid = &state->stateid;
+ nfs4_stateid stateid;
struct nfs_delegation *delegation;
- struct rpc_cred *cred = NULL;
- int status = -NFS4ERR_BAD_STATEID;
-
- /* If a state reset has been done, test_stateid is unneeded */
- if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
- return;
+ struct rpc_cred *cred;
+ int status;
/* Get the delegation credential for use by test/free_stateid */
rcu_read_lock();
delegation = rcu_dereference(NFS_I(state->inode)->delegation);
- if (delegation != NULL &&
- nfs4_stateid_match(&delegation->stateid, stateid)) {
- cred = get_rpccred(delegation->cred);
- rcu_read_unlock();
- status = nfs41_test_stateid(server, stateid, cred);
- trace_nfs4_test_delegation_stateid(state, NULL, status);
- } else
+ if (delegation == NULL) {
rcu_read_unlock();
+ return;
+ }
+
+ nfs4_stateid_copy(&stateid, &delegation->stateid);
+ cred = get_rpccred(delegation->cred);
+ rcu_read_unlock();
+ status = nfs41_test_stateid(server, &stateid, cred);
+ trace_nfs4_test_delegation_stateid(state, NULL, status);
if (status != NFS_OK) {
/* Free the stateid unless the server explicitly
* informs us the stateid is unrecognized. */
if (status != -NFS4ERR_BAD_STATEID)
- nfs41_free_stateid(server, stateid, cred);
- nfs_remove_bad_delegation(state->inode);
-
- write_seqlock(&state->seqlock);
- nfs4_stateid_copy(&state->stateid, &state->open_stateid);
- write_sequnlock(&state->seqlock);
- clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ nfs41_free_stateid(server, &stateid, cred);
+ nfs_finish_clear_delegation_stateid(state);
}
- if (cred != NULL)
- put_rpccred(cred);
+ put_rpccred(cred);
}
/**
@@ -2192,7 +2199,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
{
int status;
- nfs41_clear_delegation_stateid(state);
+ nfs41_check_delegation_stateid(state);
status = nfs41_check_open_stateid(state);
if (status != NFS_OK)
status = nfs4_open_expired(sp, state);
@@ -2231,19 +2238,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
ret = _nfs4_proc_open(opendata);
- if (ret != 0) {
- if (ret == -ENOENT) {
- dentry = opendata->dentry;
- if (dentry->d_inode)
- d_delete(dentry);
- else if (d_unhashed(dentry))
- d_add(dentry, NULL);
-
- nfs_set_verifier(dentry,
- nfs_save_change_attribute(opendata->dir->d_inode));
- }
+ if (ret != 0)
goto out;
- }
state = nfs4_opendata_to_nfs4_state(opendata);
ret = PTR_ERR(state);
@@ -4841,9 +4837,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- if (state == NULL)
- break;
- nfs_remove_bad_delegation(state->inode);
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
@@ -7709,6 +7702,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
dprintk("--> %s\n", __func__);
+ /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
+ pnfs_get_layout_hdr(NFS_I(inode)->layout);
+
lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
if (!lgp->args.layout.pages) {
nfs4_layoutget_release(lgp);
@@ -7721,9 +7717,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
lgp->res.seq_res.sr_slot = NULL;
nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
- /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
- pnfs_get_layout_hdr(NFS_I(inode)->layout);
-
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return ERR_CAST(task);
@@ -8341,7 +8334,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
.state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
- .recover_open = nfs4_open_expired,
+ .recover_open = nfs40_open_expired,
.recover_lock = nfs4_lock_expired,
.establish_clid = nfs4_init_clientid,
};
@@ -8408,8 +8401,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
| NFS_CAP_CHANGE_ATTR
| NFS_CAP_POSIX_LOCK
| NFS_CAP_STATEID_NFSV41
- | NFS_CAP_ATOMIC_OPEN_V1
- | NFS_CAP_SEEK,
+ | NFS_CAP_ATOMIC_OPEN_V1,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
@@ -8431,7 +8423,10 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_CHANGE_ATTR
| NFS_CAP_POSIX_LOCK
| NFS_CAP_STATEID_NFSV41
- | NFS_CAP_ATOMIC_OPEN_V1,
+ | NFS_CAP_ATOMIC_OPEN_V1
+ | NFS_CAP_ALLOCATE
+ | NFS_CAP_DEALLOCATE
+ | NFS_CAP_SEEK,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 206c08a60c7f..cb4376b78ed9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -141,13 +141,15 @@ static int nfs4_stat_to_errno(int);
XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \
1 /* sc_prog */ + \
- XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
- XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
+ 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
+ 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
1) /* sc_cb_ident */
#define decode_setclientid_maxsz \
(op_decode_hdr_maxsz + \
- 2 + \
- 1024) /* large value for CLID_INUSE */
+ 2 /* clientid */ + \
+ XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
+ 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
+ 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN))
#define encode_setclientid_confirm_maxsz \
(op_encode_hdr_maxsz + \
3 + (NFS4_VERIFIER_SIZE >> 2))
@@ -7394,6 +7396,8 @@ struct rpc_procinfo nfs4_procedures[] = {
#endif /* CONFIG_NFS_V4_1 */
#ifdef CONFIG_NFS_V4_2
PROC(SEEK, enc_seek, dec_seek),
+ PROC(ALLOCATE, enc_allocate, dec_allocate),
+ PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
#endif /* CONFIG_NFS_V4_2 */
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ed0db61f8543..2b5e769beb16 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
static inline void
nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
{
+ struct inode *inode;
WARN_ON_ONCE(prev == req);
if (!prev) {
@@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
* nfs_page_group_destroy is called */
kref_get(&req->wb_head->wb_kref);
- /* grab extra ref if head request has extra ref from
- * the write/commit path to handle handoff between write
- * and commit lists */
+ /* grab extra ref and bump the request count if head request
+ * has extra ref from the write/commit path to handle handoff
+ * between write and commit lists. */
if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+ inode = page_file_mapping(req->wb_page)->host;
set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->nrequests++;
+ spin_unlock(&inode->i_lock);
}
}
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index beff2769c5c5..c91a4799c562 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -269,7 +269,7 @@ int nfs_readpage(struct file *file, struct page *page)
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_CACHE_SIZE, page_file_index(page));
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
- nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+ nfs_inc_stats(inode, NFSIOS_READPAGES);
/*
* Try to flush any pending writes to the file..
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 12493846a2d3..af3af685a9e3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -575,7 +575,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
int ret;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
- nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+ nfs_inc_stats(inode, NFSIOS_WRITEPAGES);
nfs_pageio_cond_complete(pgio, page_file_index(page));
ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
@@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
nfs_lock_request(req);
spin_lock(&inode->i_lock);
- if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
+ if (!nfsi->nrequests &&
+ NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
inode->i_version++;
/*
* Swap-space should not get truncated. Hence no need to plug the race
@@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
SetPagePrivate(req->wb_page);
set_page_private(req->wb_page, (unsigned long)req);
}
- nfsi->npages++;
+ nfsi->nrequests++;
/* this a head request for a page group - mark it as having an
- * extra reference so sub groups can follow suit */
+ * extra reference so sub groups can follow suit.
+ * This flag also informs pgio layer when to bump nrequests when
+ * adding subrequests. */
WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
@@ -709,14 +712,16 @@ static void nfs_inode_remove_request(struct nfs_page *req)
wake_up_page(head->wb_page, PG_private);
clear_bit(PG_MAPPED, &head->wb_flags);
}
- nfsi->npages--;
+ nfsi->nrequests--;
+ spin_unlock(&inode->i_lock);
+ } else {
+ spin_lock(&inode->i_lock);
+ nfsi->nrequests--;
spin_unlock(&inode->i_lock);
}
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
nfs_release_request(req);
- else
- WARN_ON_ONCE(1);
}
static void
@@ -1737,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
- if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
+ if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
goto out_mark_dirty;
/* don't wait for the COMMIT response */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ed2b1151b171..7cbdf1b2e4ab 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -774,8 +774,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
{
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
- dprintk("%s slot is busy\n", __func__);
- return false;
+ /* Race breaker */
+ if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ dprintk("%s slot is busy\n", __func__);
+ return false;
+ }
+ rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
return true;
}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 747f3b95bd11..33a46a8dfaf7 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -335,12 +335,15 @@ void nfsd_lockd_shutdown(void);
(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
-#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
- (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL)
+#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL
#else
-#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0
+#define NFSD4_2_SECURITY_ATTRS 0
#endif
+#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
+ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+ NFSD4_2_SECURITY_ATTRS)
+
static inline u32 nfsd_suppattrs0(u32 minorversion)
{
return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9d3e9c50066a..89326acd4561 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
&fsnotify_mark_srcu);
}
+ /*
+ * We need to merge inode & vfsmount mark lists so that inode mark
+ * ignore masks are properly reflected for mount mark notifications.
+ * That's why this traversal is so complicated...
+ */
while (inode_node || vfsmount_node) {
- inode_group = vfsmount_group = NULL;
+ inode_group = NULL;
+ inode_mark = NULL;
+ vfsmount_group = NULL;
+ vfsmount_mark = NULL;
if (inode_node) {
inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
vfsmount_group = vfsmount_mark->group;
}
- if (inode_group > vfsmount_group) {
- /* handle inode */
- ret = send_to_group(to_tell, inode_mark, NULL, mask,
- data, data_is, cookie, file_name);
- /* we didn't use the vfsmount_mark */
- vfsmount_group = NULL;
- } else if (vfsmount_group > inode_group) {
- ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
- data, data_is, cookie, file_name);
- inode_group = NULL;
- } else {
- ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
- mask, data, data_is, cookie,
- file_name);
+ if (inode_group && vfsmount_group) {
+ int cmp = fsnotify_compare_groups(inode_group,
+ vfsmount_group);
+ if (cmp > 0) {
+ inode_group = NULL;
+ inode_mark = NULL;
+ } else if (cmp < 0) {
+ vfsmount_group = NULL;
+ vfsmount_mark = NULL;
+ }
}
+ ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
+ data, data_is, cookie, file_name);
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
goto out;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 9c0898c4cfe1..3b68b0ae0a97 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* protects reads of inode and vfsmount marks list */
extern struct srcu_struct fsnotify_mark_srcu;
+/* compare two groups for sorting of marks lists */
+extern int fsnotify_compare_groups(struct fsnotify_group *a,
+ struct fsnotify_group *b);
+
extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
__u32 mask);
/* add a mark to an inode */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index e8497144b323..dfbf5447eea4 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
{
struct fsnotify_mark *lmark, *last = NULL;
int ret = 0;
+ int cmp;
mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
goto out;
}
- if (mark->group->priority < lmark->group->priority)
- continue;
-
- if ((mark->group->priority == lmark->group->priority) &&
- (mark->group < lmark->group))
+ cmp = fsnotify_compare_groups(lmark->group, mark->group);
+ if (cmp < 0)
continue;
hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index daf76652fe58..283aa312d745 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -227,14 +227,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
struct fsnotify_event *kevent;
char __user *start;
int ret;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
start = buf;
group = file->private_data;
+ add_wait_queue(&group->notification_waitq, &wait);
while (1) {
- prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
-
mutex_lock(&group->notification_mutex);
kevent = get_one_event(group, count);
mutex_unlock(&group->notification_mutex);
@@ -264,10 +263,10 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
if (start != buf)
break;
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
+ remove_wait_queue(&group->notification_waitq, &wait);
- finish_wait(&group->notification_waitq, &wait);
if (start != buf && ret != -EFAULT)
ret = buf - start;
return ret;
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d90deaa08e78..34c38fabf514 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
}
/*
+ * Sorting function for lists of fsnotify marks.
+ *
+ * Fanotify supports different notification classes (reflected as priority of
+ * notification group). Events shall be passed to notification groups in
+ * decreasing priority order. To achieve this marks in notification lists for
+ * inodes and vfsmounts are sorted so that priorities of corresponding groups
+ * are descending.
+ *
+ * Furthermore correct handling of the ignore mask requires processing inode
+ * and vfsmount marks of each group together. Using the group address as
+ * further sort criterion provides a unique sorting order and thus we can
+ * merge inode and vfsmount lists of marks in linear time and find groups
+ * present in both lists.
+ *
+ * A return value of 1 signifies that b has priority over a.
+ * A return value of 0 signifies that the two marks have to be handled together.
+ * A return value of -1 signifies that a has priority over b.
+ */
+int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
+{
+ if (a == b)
+ return 0;
+ if (!a)
+ return 1;
+ if (!b)
+ return -1;
+ if (a->priority < b->priority)
+ return 1;
+ if (a->priority > b->priority)
+ return -1;
+ if (a < b)
+ return 1;
+ return -1;
+}
+
+/*
* Attach an initialized mark to a given group and fs object.
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group.
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index ac851e8376b1..faefa72a11eb 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
struct mount *m = real_mount(mnt);
struct fsnotify_mark *lmark, *last = NULL;
int ret = 0;
+ int cmp;
mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
goto out;
}
- if (mark->group->priority < lmark->group->priority)
- continue;
-
- if ((mark->group->priority == lmark->group->priority) &&
- (mark->group < lmark->group))
+ cmp = fsnotify_compare_groups(lmark->group, mark->group);
+ if (cmp < 0)
continue;
hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 97de0fbd9f78..a96044004064 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -925,7 +925,7 @@ static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
size_t veclen, size_t total)
{
int ret;
- struct msghdr msg;
+ struct msghdr msg = {.msg_flags = 0,};
if (sock == NULL) {
ret = -EINVAL;
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index e60125976873..34355818a2e0 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -1,4 +1,4 @@
-config OVERLAYFS_FS
+config OVERLAY_FS
tristate "Overlay filesystem support"
help
An overlay filesystem combines two filesystems - an 'upper' filesystem
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 8f91889480d0..900daed3e91d 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -2,6 +2,6 @@
# Makefile for the overlay filesystem.
#
-obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o
+obj-$(CONFIG_OVERLAY_FS) += overlay.o
-overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o
+overlay-objs := super.o inode.o dir.o readdir.o copy_up.o
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 15cd91ad9940..8ffc4b980f1b 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -284,8 +284,7 @@ out:
return ERR_PTR(err);
}
-static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry,
- enum ovl_path_type type)
+static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
{
int err;
struct dentry *ret = NULL;
@@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry,
err = ovl_check_empty_dir(dentry, &list);
if (err)
ret = ERR_PTR(err);
- else if (type == OVL_PATH_MERGE)
- ret = ovl_clear_empty(dentry, &list);
+ else {
+ /*
+ * If no upperdentry then skip clearing whiteouts.
+ *
+ * Can race with copy-up, since we don't hold the upperdir
+ * mutex. Doesn't matter, since copy-up can't create a
+ * non-empty directory from an empty one.
+ */
+ if (ovl_dentry_upper(dentry))
+ ret = ovl_clear_empty(dentry, &list);
+ }
ovl_cache_free(&list);
@@ -487,8 +495,7 @@ out:
return err;
}
-static int ovl_remove_and_whiteout(struct dentry *dentry,
- enum ovl_path_type type, bool is_dir)
+static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
{
struct dentry *workdir = ovl_workdir(dentry);
struct inode *wdir = workdir->d_inode;
@@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
int err;
if (is_dir) {
- opaquedir = ovl_check_empty_and_clear(dentry, type);
+ opaquedir = ovl_check_empty_and_clear(dentry);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir))
goto out;
@@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
if (IS_ERR(whiteout))
goto out_unlock;
- if (type == OVL_PATH_LOWER) {
+ upper = ovl_dentry_upper(dentry);
+ if (!upper) {
upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
+ dentry->d_name.len);
err = PTR_ERR(upper);
if (IS_ERR(upper))
goto kill_whiteout;
@@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
} else {
int flags = 0;
- upper = ovl_dentry_upper(dentry);
if (opaquedir)
upper = opaquedir;
err = -ESTALE;
@@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
cap_raise(override_cred->cap_effective, CAP_CHOWN);
old_cred = override_creds(override_cred);
- err = ovl_remove_and_whiteout(dentry, type, is_dir);
+ err = ovl_remove_and_whiteout(dentry, is_dir);
revert_creds(old_cred);
put_cred(override_cred);
@@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
}
if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) {
- opaquedir = ovl_check_empty_and_clear(new, new_type);
+ opaquedir = ovl_check_empty_and_clear(new);
err = PTR_ERR(opaquedir);
if (IS_ERR(opaquedir)) {
opaquedir = NULL;
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index af2d18c9fcee..07d74b24913b 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -235,26 +235,36 @@ out:
return err;
}
+static bool ovl_need_xattr_filter(struct dentry *dentry,
+ enum ovl_path_type type)
+{
+ return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode);
+}
+
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
void *value, size_t size)
{
- if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
- ovl_is_private_xattr(name))
+ struct path realpath;
+ enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+
+ if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
return -ENODATA;
- return vfs_getxattr(ovl_dentry_real(dentry), name, value, size);
+ return vfs_getxattr(realpath.dentry, name, value, size);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
+ struct path realpath;
+ enum ovl_path_type type = ovl_path_real(dentry, &realpath);
ssize_t res;
int off;
- res = vfs_listxattr(ovl_dentry_real(dentry), list, size);
+ res = vfs_listxattr(realpath.dentry, list, size);
if (res <= 0 || size == 0)
return res;
- if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE)
+ if (!ovl_need_xattr_filter(dentry, type))
return res;
/* filter out private xattrs */
@@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name)
{
int err;
struct path realpath;
- enum ovl_path_type type;
+ enum ovl_path_type type = ovl_path_real(dentry, &realpath);
err = ovl_want_write(dentry);
if (err)
goto out;
- if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE &&
- ovl_is_private_xattr(name))
+ err = -ENODATA;
+ if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
goto out_drop_write;
- type = ovl_path_real(dentry, &realpath);
if (type == OVL_PATH_LOWER) {
err = vfs_getxattr(realpath.dentry, name, NULL, 0);
if (err < 0)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 4e9d7c1fea52..ab1e3dcbed95 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -168,7 +168,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
{
struct ovl_dir_cache *cache = od->cache;
- list_del(&od->cursor.l_node);
+ list_del_init(&od->cursor.l_node);
WARN_ON(cache->refcount <= 0);
cache->refcount--;
if (!cache->refcount) {
@@ -274,11 +274,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir,
return 0;
}
-static inline int ovl_dir_read_merged(struct path *upperpath,
- struct path *lowerpath,
- struct list_head *list)
+static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
{
int err;
+ struct path lowerpath;
+ struct path upperpath;
struct ovl_readdir_data rdd = {
.ctx.actor = ovl_fill_merge,
.list = list,
@@ -286,25 +286,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath,
.is_merge = false,
};
- if (upperpath->dentry) {
- err = ovl_dir_read(upperpath, &rdd);
+ ovl_path_lower(dentry, &lowerpath);
+ ovl_path_upper(dentry, &upperpath);
+
+ if (upperpath.dentry) {
+ err = ovl_dir_read(&upperpath, &rdd);
if (err)
goto out;
- if (lowerpath->dentry) {
- err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd);
+ if (lowerpath.dentry) {
+ err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd);
if (err)
goto out;
}
}
- if (lowerpath->dentry) {
+ if (lowerpath.dentry) {
/*
* Insert lowerpath entries before upperpath ones, this allows
* offsets to be reasonably constant
*/
list_add(&rdd.middle, rdd.list);
rdd.is_merge = true;
- err = ovl_dir_read(lowerpath, &rdd);
+ err = ovl_dir_read(&lowerpath, &rdd);
list_del(&rdd.middle);
}
out:
@@ -329,8 +332,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
{
int res;
- struct path lowerpath;
- struct path upperpath;
struct ovl_dir_cache *cache;
cache = ovl_dir_cache(dentry);
@@ -347,10 +348,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
cache->refcount = 1;
INIT_LIST_HEAD(&cache->entries);
- ovl_path_lower(dentry, &lowerpath);
- ovl_path_upper(dentry, &upperpath);
-
- res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries);
+ res = ovl_dir_read_merged(dentry, &cache->entries);
if (res) {
ovl_cache_free(&cache->entries);
kfree(cache);
@@ -452,10 +450,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
/*
* Need to check if we started out being a lower dir, but got copied up
*/
- if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) {
+ if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) {
struct inode *inode = file_inode(file);
- realfile =lockless_dereference(od->upperfile);
+ realfile = lockless_dereference(od->upperfile);
if (!realfile) {
struct path upperpath;
@@ -538,14 +536,9 @@ const struct file_operations ovl_dir_operations = {
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
{
int err;
- struct path lowerpath;
- struct path upperpath;
struct ovl_cache_entry *p;
- ovl_path_upper(dentry, &upperpath);
- ovl_path_lower(dentry, &lowerpath);
-
- err = ovl_dir_read_merged(&upperpath, &lowerpath, list);
+ err = ovl_dir_read_merged(dentry, list);
if (err)
return err;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 08b704cebfc4..f16d318b71f8 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
MODULE_LICENSE("GPL");
-#define OVERLAYFS_SUPER_MAGIC 0x794c764f
+#define OVERLAYFS_SUPER_MAGIC 0x794c7630
struct ovl_config {
char *lowerdir;
@@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
{
- struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry);
- /*
- * Make sure to order reads to upperdentry wrt ovl_dentry_update()
- */
- smp_read_barrier_depends();
- return upperdentry;
+ return lockless_dereference(oe->__upperdentry);
}
void ovl_path_upper(struct dentry *dentry, struct path *path)
@@ -462,11 +457,34 @@ static const match_table_t ovl_tokens = {
{OPT_ERR, NULL}
};
+static char *ovl_next_opt(char **s)
+{
+ char *sbegin = *s;
+ char *p;
+
+ if (sbegin == NULL)
+ return NULL;
+
+ for (p = sbegin; *p; p++) {
+ if (*p == '\\') {
+ p++;
+ if (!*p)
+ break;
+ } else if (*p == ',') {
+ *p = '\0';
+ *s = p + 1;
+ return sbegin;
+ }
+ }
+ *s = NULL;
+ return sbegin;
+}
+
static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
char *p;
- while ((p = strsep(&opt, ",")) != NULL) {
+ while ((p = ovl_next_opt(&opt)) != NULL) {
int token;
substring_t args[MAX_OPT_ARGS];
@@ -554,15 +572,34 @@ out_dput:
goto out_unlock;
}
+static void ovl_unescape(char *s)
+{
+ char *d = s;
+
+ for (;; s++, d++) {
+ if (*s == '\\')
+ s++;
+ *d = *s;
+ if (!*s)
+ break;
+ }
+}
+
static int ovl_mount_dir(const char *name, struct path *path)
{
int err;
+ char *tmp = kstrdup(name, GFP_KERNEL);
+
+ if (!tmp)
+ return -ENOMEM;
- err = kern_path(name, LOOKUP_FOLLOW, path);
+ ovl_unescape(tmp);
+ err = kern_path(tmp, LOOKUP_FOLLOW, path);
if (err) {
- pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
+ pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err);
err = -EINVAL;
}
+ kfree(tmp);
return err;
}
@@ -776,11 +813,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
static struct file_system_type ovl_fs_type = {
.owner = THIS_MODULE,
- .name = "overlayfs",
+ .name = "overlay",
.mount = ovl_mount,
.kill_sb = kill_anon_super,
};
-MODULE_ALIAS_FS("overlayfs");
+MODULE_ALIAS_FS("overlay");
static int __init ovl_init(void)
{
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4e0388cffe3d..f6734c6b66a6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -552,6 +552,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_GROWSDOWN)] = "gd",
[ilog2(VM_PFNMAP)] = "pf",
[ilog2(VM_DENYWRITE)] = "dw",
+#ifdef CONFIG_X86_INTEL_MPX
+ [ilog2(VM_MPX)] = "mp",
+#endif
[ilog2(VM_LOCKED)] = "lo",
[ilog2(VM_IO)] = "io",
[ilog2(VM_SEQ_READ)] = "sr",
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index fafb7a02a5d6..50416602774d 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -36,6 +36,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
+#include <linux/syslog.h>
#include "internal.h"
@@ -120,6 +121,18 @@ static const struct seq_operations pstore_ftrace_seq_ops = {
.show = pstore_ftrace_seq_show,
};
+static int pstore_check_syslog_permissions(struct pstore_private *ps)
+{
+ switch (ps->type) {
+ case PSTORE_TYPE_DMESG:
+ case PSTORE_TYPE_CONSOLE:
+ return check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
+ SYSLOG_FROM_READER);
+ default:
+ return 0;
+ }
+}
+
static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
@@ -138,6 +151,10 @@ static int pstore_file_open(struct inode *inode, struct file *file)
int err;
const struct seq_operations *sops = NULL;
+ err = pstore_check_syslog_permissions(ps);
+ if (err)
+ return err;
+
if (ps->type == PSTORE_TYPE_FTRACE)
sops = &pstore_ftrace_seq_ops;
@@ -174,6 +191,11 @@ static const struct file_operations pstore_file_operations = {
static int pstore_unlink(struct inode *dir, struct dentry *dentry)
{
struct pstore_private *p = dentry->d_inode->i_private;
+ int err;
+
+ err = pstore_check_syslog_permissions(p);
+ if (err)
+ return err;
if (p->psi->erase)
p->psi->erase(p->type, p->id, p->count,
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 3b5744306ed8..ec881b312700 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -135,25 +135,27 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
return prz;
}
-static void ramoops_read_kmsg_hdr(char *buffer, struct timespec *time,
+static int ramoops_read_kmsg_hdr(char *buffer, struct timespec *time,
bool *compressed)
{
char data_type;
+ int header_length = 0;
- if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n",
- &time->tv_sec, &time->tv_nsec, &data_type) == 3) {
+ if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n%n", &time->tv_sec,
+ &time->tv_nsec, &data_type, &header_length) == 3) {
if (data_type == 'C')
*compressed = true;
else
*compressed = false;
- } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
- &time->tv_sec, &time->tv_nsec) == 2) {
+ } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n%n",
+ &time->tv_sec, &time->tv_nsec, &header_length) == 2) {
*compressed = false;
} else {
time->tv_sec = 0;
time->tv_nsec = 0;
*compressed = false;
}
+ return header_length;
}
static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
@@ -165,6 +167,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
ssize_t ecc_notice_size;
struct ramoops_context *cxt = psi->data;
struct persistent_ram_zone *prz;
+ int header_length;
prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt,
cxt->max_dump_cnt, id, type,
@@ -178,7 +181,13 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
if (!prz)
return 0;
+ if (!persistent_ram_old(prz))
+ return 0;
+
size = persistent_ram_old_size(prz);
+ header_length = ramoops_read_kmsg_hdr(persistent_ram_old(prz), time,
+ compressed);
+ size -= header_length;
/* ECC correction notice */
ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
@@ -187,8 +196,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
if (*buf == NULL)
return -ENOMEM;
- memcpy(*buf, persistent_ram_old(prz), size);
- ramoops_read_kmsg_hdr(*buf, time, compressed);
+ memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size);
persistent_ram_ecc_string(prz, *buf + size, ecc_notice_size + 1);
return size + ecc_notice_size;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 92e8f99a5857..281002689d64 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1338,7 +1338,10 @@ xfs_free_file_space(
goto out;
}
-
+/*
+ * Preallocate and zero a range of a file. This mechanism has the allocation
+ * semantics of fallocate and in addition converts data in the range to zeroes.
+ */
int
xfs_zero_file_space(
struct xfs_inode *ip,
@@ -1346,65 +1349,30 @@ xfs_zero_file_space(
xfs_off_t len)
{
struct xfs_mount *mp = ip->i_mount;
- uint granularity;
- xfs_off_t start_boundary;
- xfs_off_t end_boundary;
+ uint blksize;
int error;
trace_xfs_zero_file_space(ip);
- granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+ blksize = 1 << mp->m_sb.sb_blocklog;
/*
- * Round the range of extents we are going to convert inwards. If the
- * offset is aligned, then it doesn't get changed so we zero from the
- * start of the block offset points to.
+ * Punch a hole and prealloc the range. We use hole punch rather than
+ * unwritten extent conversion for two reasons:
+ *
+ * 1.) Hole punch handles partial block zeroing for us.
+ *
+ * 2.) If prealloc returns ENOSPC, the file range is still zero-valued
+ * by virtue of the hole punch.
*/
- start_boundary = round_up(offset, granularity);
- end_boundary = round_down(offset + len, granularity);
-
- ASSERT(start_boundary >= offset);
- ASSERT(end_boundary <= offset + len);
-
- if (start_boundary < end_boundary - 1) {
- /*
- * Writeback the range to ensure any inode size updates due to
- * appending writes make it to disk (otherwise we could just
- * punch out the delalloc blocks).
- */
- error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
- start_boundary, end_boundary - 1);
- if (error)
- goto out;
- truncate_pagecache_range(VFS_I(ip), start_boundary,
- end_boundary - 1);
-
- /* convert the blocks */
- error = xfs_alloc_file_space(ip, start_boundary,
- end_boundary - start_boundary - 1,
- XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT);
- if (error)
- goto out;
-
- /* We've handled the interior of the range, now for the edges */
- if (start_boundary != offset) {
- error = xfs_iozero(ip, offset, start_boundary - offset);
- if (error)
- goto out;
- }
-
- if (end_boundary != offset + len)
- error = xfs_iozero(ip, end_boundary,
- offset + len - end_boundary);
-
- } else {
- /*
- * It's either a sub-granularity range or the range spanned lies
- * partially across two adjacent blocks.
- */
- error = xfs_iozero(ip, offset, len);
- }
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+ goto out;
+ error = xfs_alloc_file_space(ip, round_down(offset, blksize),
+ round_up(offset + len, blksize) -
+ round_down(offset, blksize),
+ XFS_BMAPI_PREALLOC);
out:
return error;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f1deb961a296..894924a5129b 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk(
XFS_WANT_CORRUPTED_RETURN(stat == 1);
/* Check if the record contains the inode in request */
- if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
- return -EINVAL;
+ if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
+ *icount = 0;
+ return 0;
+ }
idx = agino - irec->ir_startino + 1;
if (idx < XFS_INODES_PER_CHUNK &&
@@ -262,75 +264,76 @@ xfs_bulkstat_grab_ichunk(
#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
+struct xfs_bulkstat_agichunk {
+ char __user **ac_ubuffer;/* pointer into user's buffer */
+ int ac_ubleft; /* bytes left in user's buffer */
+ int ac_ubelem; /* spaces used in user's buffer */
+};
+
/*
* Process inodes in chunk with a pointer to a formatter function
* that will iget the inode and fill in the appropriate structure.
*/
-int
+static int
xfs_bulkstat_ag_ichunk(
struct xfs_mount *mp,
xfs_agnumber_t agno,
struct xfs_inobt_rec_incore *irbp,
bulkstat_one_pf formatter,
size_t statstruct_size,
- struct xfs_bulkstat_agichunk *acp)
+ struct xfs_bulkstat_agichunk *acp,
+ xfs_agino_t *last_agino)
{
- xfs_ino_t lastino = acp->ac_lastino;
char __user **ubufp = acp->ac_ubuffer;
- int ubleft = acp->ac_ubleft;
- int ubelem = acp->ac_ubelem;
- int chunkidx, clustidx;
+ int chunkidx;
int error = 0;
- xfs_agino_t agino;
+ xfs_agino_t agino = irbp->ir_startino;
- for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
- XFS_BULKSTAT_UBLEFT(ubleft) &&
- irbp->ir_freecount < XFS_INODES_PER_CHUNK;
- chunkidx++, clustidx++, agino++) {
- int fmterror; /* bulkstat formatter result */
+ for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
+ chunkidx++, agino++) {
+ int fmterror;
int ubused;
- xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
- ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
+ /* inode won't fit in buffer, we are done */
+ if (acp->ac_ubleft < statstruct_size)
+ break;
/* Skip if this inode is free */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
- lastino = ino;
+ if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
continue;
- }
-
- /*
- * Count used inodes as free so we can tell when the
- * chunk is used up.
- */
- irbp->ir_freecount++;
/* Get the inode and fill in a single buffer */
ubused = statstruct_size;
- error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror);
- if (fmterror == BULKSTAT_RV_NOTHING) {
- if (error && error != -ENOENT && error != -EINVAL) {
- ubleft = 0;
- break;
- }
- lastino = ino;
- continue;
- }
- if (fmterror == BULKSTAT_RV_GIVEUP) {
- ubleft = 0;
+ error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
+ *ubufp, acp->ac_ubleft, &ubused, &fmterror);
+
+ if (fmterror == BULKSTAT_RV_GIVEUP ||
+ (error && error != -ENOENT && error != -EINVAL)) {
+ acp->ac_ubleft = 0;
ASSERT(error);
break;
}
- if (*ubufp)
- *ubufp += ubused;
- ubleft -= ubused;
- ubelem++;
- lastino = ino;
+
+ /* be careful not to leak error if at end of chunk */
+ if (fmterror == BULKSTAT_RV_NOTHING || error) {
+ error = 0;
+ continue;
+ }
+
+ *ubufp += ubused;
+ acp->ac_ubleft -= ubused;
+ acp->ac_ubelem++;
}
- acp->ac_lastino = lastino;
- acp->ac_ubleft = ubleft;
- acp->ac_ubelem = ubelem;
+ /*
+ * Post-update *last_agino. At this point, agino will always point one
+ * inode past the last inode we processed successfully. Hence we
+ * substract that inode when setting the *last_agino cursor so that we
+ * return the correct cookie to userspace. On the next bulkstat call,
+ * the inode under the lastino cookie will be skipped as we have already
+ * processed it here.
+ */
+ *last_agino = agino - 1;
return error;
}
@@ -353,45 +356,33 @@ xfs_bulkstat(
xfs_agino_t agino; /* inode # in allocation group */
xfs_agnumber_t agno; /* allocation group number */
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
- int end_of_ag; /* set if we've seen the ag end */
- int error; /* error code */
- int fmterror;/* bulkstat formatter result */
- int i; /* loop index */
- int icount; /* count of inodes good in irbuf */
size_t irbsize; /* size of irec buffer in bytes */
- xfs_ino_t ino; /* inode number (filesystem) */
- xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
- xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
- xfs_ino_t lastino; /* last inode number returned */
int nirbuf; /* size of irbuf */
- int rval; /* return value error code */
- int tmp; /* result value from btree calls */
int ubcount; /* size of user's buffer */
- int ubleft; /* bytes left in user's buffer */
- char __user *ubufp; /* pointer into user's buffer */
- int ubelem; /* spaces used in user's buffer */
+ struct xfs_bulkstat_agichunk ac;
+ int error = 0;
/*
* Get the last inode value, see if there's nothing to do.
*/
- ino = (xfs_ino_t)*lastinop;
- lastino = ino;
- agno = XFS_INO_TO_AGNO(mp, ino);
- agino = XFS_INO_TO_AGINO(mp, ino);
+ agno = XFS_INO_TO_AGNO(mp, *lastinop);
+ agino = XFS_INO_TO_AGINO(mp, *lastinop);
if (agno >= mp->m_sb.sb_agcount ||
- ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
+ *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
*done = 1;
*ubcountp = 0;
return 0;
}
ubcount = *ubcountp; /* statstruct's */
- ubleft = ubcount * statstruct_size; /* bytes */
- *ubcountp = ubelem = 0;
+ ac.ac_ubuffer = &ubuffer;
+ ac.ac_ubleft = ubcount * statstruct_size; /* bytes */;
+ ac.ac_ubelem = 0;
+
+ *ubcountp = 0;
*done = 0;
- fmterror = 0;
- ubufp = ubuffer;
+
irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
if (!irbuf)
return -ENOMEM;
@@ -402,9 +393,13 @@ xfs_bulkstat(
* Loop over the allocation groups, starting from the last
* inode returned; 0 means start of the allocation group.
*/
- rval = 0;
- while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
- cond_resched();
+ while (agno < mp->m_sb.sb_agcount) {
+ struct xfs_inobt_rec_incore *irbp = irbuf;
+ struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
+ bool end_of_ag = false;
+ int icount = 0;
+ int stat;
+
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
if (error)
break;
@@ -414,10 +409,6 @@ xfs_bulkstat(
*/
cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
XFS_BTNUM_INO);
- irbp = irbuf;
- irbufend = irbuf + nirbuf;
- end_of_ag = 0;
- icount = 0;
if (agino > 0) {
/*
* In the middle of an allocation group, we need to get
@@ -427,22 +418,23 @@ xfs_bulkstat(
error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
if (error)
- break;
+ goto del_cursor;
if (icount) {
irbp->ir_startino = r.ir_startino;
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
- agino = r.ir_startino + XFS_INODES_PER_CHUNK;
}
/* Increment to the next record */
- error = xfs_btree_increment(cur, 0, &tmp);
+ error = xfs_btree_increment(cur, 0, &stat);
} else {
/* Start of ag. Lookup the first inode chunk */
- error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
+ error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
+ }
+ if (error || stat == 0) {
+ end_of_ag = true;
+ goto del_cursor;
}
- if (error)
- break;
/*
* Loop through inode btree records in this ag,
@@ -451,10 +443,10 @@ xfs_bulkstat(
while (irbp < irbufend && icount < ubcount) {
struct xfs_inobt_rec_incore r;
- error = xfs_inobt_get_rec(cur, &r, &i);
- if (error || i == 0) {
- end_of_ag = 1;
- break;
+ error = xfs_inobt_get_rec(cur, &r, &stat);
+ if (error || stat == 0) {
+ end_of_ag = true;
+ goto del_cursor;
}
/*
@@ -469,77 +461,79 @@ xfs_bulkstat(
irbp++;
icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
}
- /*
- * Set agino to after this chunk and bump the cursor.
- */
- agino = r.ir_startino + XFS_INODES_PER_CHUNK;
- error = xfs_btree_increment(cur, 0, &tmp);
+ error = xfs_btree_increment(cur, 0, &stat);
+ if (error || stat == 0) {
+ end_of_ag = true;
+ goto del_cursor;
+ }
cond_resched();
}
+
/*
- * Drop the btree buffers and the agi buffer.
- * We can't hold any of the locks these represent
- * when calling iget.
+ * Drop the btree buffers and the agi buffer as we can't hold any
+ * of the locks these represent when calling iget. If there is a
+ * pending error, then we are done.
*/
+del_cursor:
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
xfs_buf_relse(agbp);
+ if (error)
+ break;
/*
- * Now format all the good inodes into the user's buffer.
+ * Now format all the good inodes into the user's buffer. The
+ * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
+ * for the next loop iteration.
*/
irbufend = irbp;
for (irbp = irbuf;
- irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
- struct xfs_bulkstat_agichunk ac;
-
- ac.ac_lastino = lastino;
- ac.ac_ubuffer = &ubuffer;
- ac.ac_ubleft = ubleft;
- ac.ac_ubelem = ubelem;
+ irbp < irbufend && ac.ac_ubleft >= statstruct_size;
+ irbp++) {
error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
- formatter, statstruct_size, &ac);
+ formatter, statstruct_size, &ac,
+ &agino);
if (error)
- rval = error;
-
- lastino = ac.ac_lastino;
- ubleft = ac.ac_ubleft;
- ubelem = ac.ac_ubelem;
+ break;
cond_resched();
}
+
/*
- * Set up for the next loop iteration.
+ * If we've run out of space or had a formatting error, we
+ * are now done
*/
- if (XFS_BULKSTAT_UBLEFT(ubleft)) {
- if (end_of_ag) {
- agno++;
- agino = 0;
- } else
- agino = XFS_INO_TO_AGINO(mp, lastino);
- } else
+ if (ac.ac_ubleft < statstruct_size || error)
break;
+
+ if (end_of_ag) {
+ agno++;
+ agino = 0;
+ }
}
/*
* Done, we're either out of filesystem or space to put the data.
*/
kmem_free(irbuf);
- *ubcountp = ubelem;
+ *ubcountp = ac.ac_ubelem;
+
/*
- * Found some inodes, return them now and return the error next time.
+ * We found some inodes, so clear the error status and return them.
+ * The lastino pointer will point directly at the inode that triggered
+ * any error that occurred, so on the next call the error will be
+ * triggered again and propagated to userspace as there will be no
+ * formatted inodes in the buffer.
*/
- if (ubelem)
- rval = 0;
- if (agno >= mp->m_sb.sb_agcount) {
- /*
- * If we ran out of filesystem, mark lastino as off
- * the end of the filesystem, so the next call
- * will return immediately.
- */
- *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
+ if (ac.ac_ubelem)
+ error = 0;
+
+ /*
+ * If we ran out of filesystem, lastino will point off the end of
+ * the filesystem so the next call will return immediately.
+ */
+ *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
+ if (agno >= mp->m_sb.sb_agcount)
*done = 1;
- } else
- *lastinop = (xfs_ino_t)lastino;
- return rval;
+ return error;
}
int
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index aaed08022eb9..6ea8b3912fa4 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
int *ubused,
int *stat);
-struct xfs_bulkstat_agichunk {
- xfs_ino_t ac_lastino; /* last inode returned */
- char __user **ac_ubuffer;/* pointer into user's buffer */
- int ac_ubleft; /* bytes left in user's buffer */
- int ac_ubelem; /* spaces used in user's buffer */
-};
-
-int
-xfs_bulkstat_ag_ichunk(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- struct xfs_inobt_rec_incore *irbp,
- bulkstat_one_pf formatter,
- size_t statstruct_size,
- struct xfs_bulkstat_agichunk *acp);
-
/*
* Values for stat return value.
*/