aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/addr_list.c2
-rw-r--r--fs/afs/afs.h16
-rw-r--r--fs/afs/callback.c29
-rw-r--r--fs/afs/cell.c187
-rw-r--r--fs/afs/cmservice.c14
-rw-r--r--fs/afs/dir.c375
-rw-r--r--fs/afs/dir_silly.c35
-rw-r--r--fs/afs/dynroot.c5
-rw-r--r--fs/afs/file.c29
-rw-r--r--fs/afs/flock.c49
-rw-r--r--fs/afs/fs_probe.c4
-rw-r--r--fs/afs/fsclient.c702
-rw-r--r--fs/afs/inode.c453
-rw-r--r--fs/afs/internal.h199
-rw-r--r--fs/afs/proc.c8
-rw-r--r--fs/afs/rotate.c47
-rw-r--r--fs/afs/rxrpc.c20
-rw-r--r--fs/afs/security.c19
-rw-r--r--fs/afs/server.c17
-rw-r--r--fs/afs/super.c22
-rw-r--r--fs/afs/vl_list.c20
-rw-r--r--fs/afs/vl_probe.c4
-rw-r--r--fs/afs/vl_rotate.c28
-rw-r--r--fs/afs/vlclient.c38
-rw-r--r--fs/afs/write.c100
-rw-r--r--fs/afs/xattr.c202
-rw-r--r--fs/afs/yfsclient.c714
-rw-r--r--fs/binfmt_elf.c180
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/cachefiles/namei.c1
-rw-r--r--fs/ceph/caps.c93
-rw-r--r--fs/ceph/debugfs.c40
-rw-r--r--fs/ceph/export.c356
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c85
-rw-r--r--fs/ceph/locks.c13
-rw-r--r--fs/ceph/mds_client.c205
-rw-r--r--fs/ceph/mds_client.h33
-rw-r--r--fs/ceph/mdsmap.c2
-rw-r--r--fs/ceph/quota.c177
-rw-r--r--fs/ceph/super.c7
-rw-r--r--fs/ceph/super.h2
-rw-r--r--fs/cifs/dns_resolve.c2
-rw-r--r--fs/coda/psdev.c1
-rw-r--r--fs/configfs/dir.c17
-rw-r--r--fs/dax.c8
-rw-r--r--fs/eventfd.c8
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/f2fs/acl.c4
-rw-r--r--fs/f2fs/checkpoint.c108
-rw-r--r--fs/f2fs/data.c285
-rw-r--r--fs/f2fs/f2fs.h127
-rw-r--r--fs/f2fs/file.c76
-rw-r--r--fs/f2fs/gc.c16
-rw-r--r--fs/f2fs/inline.c17
-rw-r--r--fs/f2fs/inode.c12
-rw-r--r--fs/f2fs/namei.c2
-rw-r--r--fs/f2fs/node.c43
-rw-r--r--fs/f2fs/recovery.c37
-rw-r--r--fs/f2fs/segment.c71
-rw-r--r--fs/f2fs/segment.h16
-rw-r--r--fs/f2fs/super.c70
-rw-r--r--fs/f2fs/xattr.c36
-rw-r--r--fs/f2fs/xattr.h2
-rw-r--r--fs/fat/file.c11
-rw-r--r--fs/fsopen.c2
-rw-r--r--fs/fuse/control.c20
-rw-r--r--fs/fuse/cuse.c13
-rw-r--r--fs/fuse/dev.c16
-rw-r--r--fs/fuse/file.c22
-rw-r--r--fs/fuse/fuse_i.h7
-rw-r--r--fs/fuse/inode.c23
-rw-r--r--fs/gfs2/sys.c8
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hugetlbfs/inode.c18
-rw-r--r--fs/io_uring.c93
-rw-r--r--fs/lockd/clntlock.c4
-rw-r--r--fs/lockd/svc.c33
-rw-r--r--fs/locks.c12
-rw-r--r--fs/nfs/callback.c9
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/client.c1
-rw-r--r--fs/nfs/dns_resolve.c2
-rw-r--r--fs/nfsd/export.c18
-rw-r--r--fs/nfsd/netns.h11
-rw-r--r--fs/nfsd/nfs3xdr.c21
-rw-r--r--fs/nfsd/nfs4callback.c9
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/nfs4layouts.c2
-rw-r--r--fs/nfsd/nfs4proc.c3
-rw-r--r--fs/nfsd/nfs4recover.c436
-rw-r--r--fs/nfsd/nfs4state.c68
-rw-r--r--fs/nfsd/nfs4xdr.c9
-rw-r--r--fs/nfsd/nfsctl.c42
-rw-r--r--fs/nfsd/nfsd.h17
-rw-r--r--fs/nfsd/nfssvc.c271
-rw-r--r--fs/nfsd/nfsxdr.c17
-rw-r--r--fs/nfsd/state.h8
-rw-r--r--fs/nfsd/vfs.c8
-rw-r--r--fs/nfsd/vfs.h5
-rw-r--r--fs/notify/fsnotify.c41
-rw-r--r--fs/notify/mark.c5
-rw-r--r--fs/ocfs2/dir.c20
-rw-r--r--fs/ocfs2/export.c30
-rw-r--r--fs/ocfs2/ocfs2_fs.h28
-rw-r--r--fs/orangefs/orangefs-bufmap.c2
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/dir.c2
-rw-r--r--fs/overlayfs/file.c133
-rw-r--r--fs/overlayfs/inode.c3
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/quota/dquot.c37
-rw-r--r--fs/quota/quota_v1.c2
-rw-r--r--fs/quota/quota_v2.c2
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/xattr.c9
-rw-r--r--fs/sync.c21
-rw-r--r--fs/ubifs/auth.c33
-rw-r--r--fs/ubifs/debug.c1
-rw-r--r--fs/ubifs/dir.c29
-rw-r--r--fs/ubifs/file.c16
-rw-r--r--fs/ubifs/find.c9
-rw-r--r--fs/ubifs/ioctl.c11
-rw-r--r--fs/ubifs/journal.c72
-rw-r--r--fs/ubifs/misc.h8
-rw-r--r--fs/ubifs/orphan.c208
-rw-r--r--fs/ubifs/sb.c7
-rw-r--r--fs/ubifs/super.c22
-rw-r--r--fs/ubifs/tnc.c15
-rw-r--r--fs/ubifs/ubifs.h6
-rw-r--r--fs/ubifs/xattr.c71
-rw-r--r--fs/udf/namei.c15
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/userfaultfd.c5
137 files changed, 4753 insertions, 2882 deletions
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 967db336d11a..9eaff55df7b4 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -251,7 +251,7 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry
_enter("%s", cell->name);
ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
- &result, _expiry);
+ &result, _expiry, true);
if (ret < 0) {
_leave(" = %d [dns]", ret);
return ERR_PTR(ret);
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index d12ffb457e47..3f4e460c6655 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -23,6 +23,9 @@
#define AFSPATHMAX 1024 /* Maximum length of a pathname plus NUL */
#define AFSOPAQUEMAX 1024 /* Maximum length of an opaque field */
+#define AFS_VL_MAX_LIFESPAN (120 * HZ)
+#define AFS_PROBE_MAX_LIFESPAN (30 * HZ)
+
typedef u64 afs_volid_t;
typedef u64 afs_vnodeid_t;
typedef u64 afs_dataversion_t;
@@ -69,8 +72,8 @@ typedef enum {
struct afs_callback {
time64_t expires_at; /* Time at which expires */
- unsigned version; /* Callback version */
- afs_callback_type_t type; /* Type of callback */
+ //unsigned version; /* Callback version */
+ //afs_callback_type_t type; /* Type of callback */
};
struct afs_callback_break {
@@ -144,6 +147,15 @@ struct afs_file_status {
u32 abort_code; /* Abort if bulk-fetching this failed */
};
+struct afs_status_cb {
+ struct afs_file_status status;
+ struct afs_callback callback;
+ unsigned int cb_break; /* Pre-op callback break counter */
+ bool have_status; /* True if status record was retrieved */
+ bool have_cb; /* True if cb record was retrieved */
+ bool have_error; /* True if status.abort_code indicates an error */
+};
+
/*
* AFS file status change request
*/
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 128f2dbe256a..d441bef72163 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -94,15 +94,15 @@ int afs_register_server_cb_interest(struct afs_vnode *vnode,
struct afs_server *server = entry->server;
again:
- if (vnode->cb_interest &&
- likely(vnode->cb_interest == entry->cb_interest))
+ vcbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->io_lock));
+ if (vcbi && likely(vcbi == entry->cb_interest))
return 0;
read_lock(&slist->lock);
cbi = afs_get_cb_interest(entry->cb_interest);
read_unlock(&slist->lock);
- vcbi = vnode->cb_interest;
if (vcbi) {
if (vcbi == cbi) {
afs_put_cb_interest(afs_v2net(vnode), cbi);
@@ -114,8 +114,9 @@ again:
*/
if (cbi && vcbi->server == cbi->server) {
write_seqlock(&vnode->cb_lock);
- old = vnode->cb_interest;
- vnode->cb_interest = cbi;
+ old = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ rcu_assign_pointer(vnode->cb_interest, cbi);
write_sequnlock(&vnode->cb_lock);
afs_put_cb_interest(afs_v2net(vnode), old);
return 0;
@@ -160,8 +161,9 @@ again:
*/
write_seqlock(&vnode->cb_lock);
- old = vnode->cb_interest;
- vnode->cb_interest = cbi;
+ old = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ rcu_assign_pointer(vnode->cb_interest, cbi);
vnode->cb_s_break = cbi->server->cb_s_break;
vnode->cb_v_break = vnode->volume->cb_v_break;
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
@@ -191,10 +193,11 @@ void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
vi = NULL;
write_unlock(&cbi->server->cb_break_lock);
- kfree(vi);
+ if (vi)
+ kfree_rcu(vi, rcu);
afs_put_server(net, cbi->server);
}
- kfree(cbi);
+ kfree_rcu(cbi, rcu);
}
}
@@ -218,14 +221,8 @@ void __afs_break_callback(struct afs_vnode *vnode)
vnode->cb_break++;
afs_clear_permits(vnode);
- spin_lock(&vnode->lock);
-
- _debug("break callback");
-
- if (list_empty(&vnode->granted_locks) &&
- !list_empty(&vnode->pending_locks))
+ if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
afs_lock_may_be_available(vnode);
- spin_unlock(&vnode->lock);
}
}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 9de46116c749..9c3b07ba2222 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -123,6 +123,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
const char *name, unsigned int namelen,
const char *addresses)
{
+ struct afs_vlserver_list *vllist;
struct afs_cell *cell;
int i, ret;
@@ -151,18 +152,14 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
atomic_set(&cell->usage, 2);
INIT_WORK(&cell->manager, afs_manage_cell);
- cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
- (1 << AFS_CELL_FL_NO_LOOKUP_YET));
INIT_LIST_HEAD(&cell->proc_volumes);
rwlock_init(&cell->proc_lock);
rwlock_init(&cell->vl_servers_lock);
- /* Fill in the VL server list if we were given a list of addresses to
- * use.
+ /* Provide a VL server list, filling it in if we were given a list of
+ * addresses to use.
*/
if (addresses) {
- struct afs_vlserver_list *vllist;
-
vllist = afs_parse_text_addrs(net,
addresses, strlen(addresses), ':',
VL_SERVICE, AFS_VL_PORT);
@@ -171,19 +168,32 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
goto parse_failed;
}
- rcu_assign_pointer(cell->vl_servers, vllist);
+ vllist->source = DNS_RECORD_FROM_CONFIG;
+ vllist->status = DNS_LOOKUP_NOT_DONE;
cell->dns_expiry = TIME64_MAX;
- __clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags);
} else {
+ ret = -ENOMEM;
+ vllist = afs_alloc_vlserver_list(0);
+ if (!vllist)
+ goto error;
+ vllist->source = DNS_RECORD_UNAVAILABLE;
+ vllist->status = DNS_LOOKUP_NOT_DONE;
cell->dns_expiry = ktime_get_real_seconds();
}
+ rcu_assign_pointer(cell->vl_servers, vllist);
+
+ cell->dns_source = vllist->source;
+ cell->dns_status = vllist->status;
+ smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */
+
_leave(" = %p", cell);
return cell;
parse_failed:
if (ret == -EINVAL)
printk(KERN_ERR "kAFS: bad VL server IP address\n");
+error:
kfree(cell);
_leave(" = %d", ret);
return ERR_PTR(ret);
@@ -208,6 +218,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
{
struct afs_cell *cell, *candidate, *cursor;
struct rb_node *parent, **pp;
+ enum afs_cell_state state;
int ret, n;
_enter("%s,%s", name, vllist);
@@ -267,18 +278,16 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
wait_for_cell:
_debug("wait_for_cell");
- ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE);
- smp_rmb();
-
- switch (READ_ONCE(cell->state)) {
- case AFS_CELL_FAILED:
+ wait_var_event(&cell->state,
+ ({
+ state = smp_load_acquire(&cell->state); /* vs error */
+ state == AFS_CELL_ACTIVE || state == AFS_CELL_FAILED;
+ }));
+
+ /* Check the state obtained from the wait check. */
+ if (state == AFS_CELL_FAILED) {
ret = cell->error;
goto error;
- default:
- _debug("weird %u %d", cell->state, cell->error);
- goto error;
- case AFS_CELL_ACTIVE:
- break;
}
_leave(" = %p [cell]", cell);
@@ -360,16 +369,46 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
/*
* Update a cell's VL server address list from the DNS.
*/
-static void afs_update_cell(struct afs_cell *cell)
+static int afs_update_cell(struct afs_cell *cell)
{
- struct afs_vlserver_list *vllist, *old;
+ struct afs_vlserver_list *vllist, *old = NULL, *p;
unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
time64_t now, expiry = 0;
+ int ret = 0;
_enter("%s", cell->name);
vllist = afs_dns_query(cell, &expiry);
+ if (IS_ERR(vllist)) {
+ ret = PTR_ERR(vllist);
+
+ _debug("%s: fail %d", cell->name, ret);
+ if (ret == -ENOMEM)
+ goto out_wake;
+
+ ret = -ENOMEM;
+ vllist = afs_alloc_vlserver_list(0);
+ if (!vllist)
+ goto out_wake;
+
+ switch (ret) {
+ case -ENODATA:
+ case -EDESTADDRREQ:
+ vllist->status = DNS_LOOKUP_GOT_NOT_FOUND;
+ break;
+ case -EAGAIN:
+ case -ECONNREFUSED:
+ vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE;
+ break;
+ default:
+ vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE;
+ break;
+ }
+ }
+
+ _debug("%s: got list %d %d", cell->name, vllist->source, vllist->status);
+ cell->dns_status = vllist->status;
now = ktime_get_real_seconds();
if (min_ttl > max_ttl)
@@ -379,48 +418,47 @@ static void afs_update_cell(struct afs_cell *cell)
else if (expiry > now + max_ttl)
expiry = now + max_ttl;
- if (IS_ERR(vllist)) {
- switch (PTR_ERR(vllist)) {
- case -ENODATA:
- case -EDESTADDRREQ:
+ _debug("%s: status %d", cell->name, vllist->status);
+ if (vllist->source == DNS_RECORD_UNAVAILABLE) {
+ switch (vllist->status) {
+ case DNS_LOOKUP_GOT_NOT_FOUND:
/* The DNS said that the cell does not exist or there
* weren't any addresses to be had.
*/
- set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
- clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
cell->dns_expiry = expiry;
break;
- case -EAGAIN:
- case -ECONNREFUSED:
+ case DNS_LOOKUP_BAD:
+ case DNS_LOOKUP_GOT_LOCAL_FAILURE:
+ case DNS_LOOKUP_GOT_TEMP_FAILURE:
+ case DNS_LOOKUP_GOT_NS_FAILURE:
default:
- set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
cell->dns_expiry = now + 10;
break;
}
-
- cell->error = -EDESTADDRREQ;
} else {
- clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
- clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
-
- /* Exclusion on changing vl_addrs is achieved by a
- * non-reentrant work item.
- */
- old = rcu_dereference_protected(cell->vl_servers, true);
- rcu_assign_pointer(cell->vl_servers, vllist);
cell->dns_expiry = expiry;
-
- if (old)
- afs_put_vlserverlist(cell->net, old);
}
- if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
- wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET);
+ /* Replace the VL server list if the new record has servers or the old
+ * record doesn't.
+ */
+ write_lock(&cell->vl_servers_lock);
+ p = rcu_dereference_protected(cell->vl_servers, true);
+ if (vllist->nr_servers > 0 || p->nr_servers == 0) {
+ rcu_assign_pointer(cell->vl_servers, vllist);
+ cell->dns_source = vllist->source;
+ old = p;
+ }
+ write_unlock(&cell->vl_servers_lock);
+ afs_put_vlserverlist(cell->net, old);
- now = ktime_get_real_seconds();
- afs_set_cell_timer(cell->net, cell->dns_expiry - now);
- _leave("");
+out_wake:
+ smp_store_release(&cell->dns_lookup_count,
+ cell->dns_lookup_count + 1); /* vs source/status */
+ wake_up_var(&cell->dns_lookup_count);
+ _leave(" = %d", ret);
+ return ret;
}
/*
@@ -491,8 +529,7 @@ void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
now = ktime_get_real_seconds();
cell->last_inactive = now;
expire_delay = 0;
- if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
- !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
+ if (cell->vl_servers->nr_servers)
expire_delay = afs_cell_gc_delay;
if (atomic_dec_return(&cell->usage) > 1)
@@ -623,11 +660,13 @@ again:
goto final_destruction;
if (cell->state == AFS_CELL_FAILED)
goto done;
- cell->state = AFS_CELL_UNSET;
+ smp_store_release(&cell->state, AFS_CELL_UNSET);
+ wake_up_var(&cell->state);
goto again;
case AFS_CELL_UNSET:
- cell->state = AFS_CELL_ACTIVATING;
+ smp_store_release(&cell->state, AFS_CELL_ACTIVATING);
+ wake_up_var(&cell->state);
goto again;
case AFS_CELL_ACTIVATING:
@@ -635,28 +674,29 @@ again:
if (ret < 0)
goto activation_failed;
- cell->state = AFS_CELL_ACTIVE;
- smp_wmb();
- clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
- wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ smp_store_release(&cell->state, AFS_CELL_ACTIVE);
+ wake_up_var(&cell->state);
goto again;
case AFS_CELL_ACTIVE:
if (atomic_read(&cell->usage) > 1) {
- time64_t now = ktime_get_real_seconds();
- if (cell->dns_expiry <= now && net->live)
- afs_update_cell(cell);
+ if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
+ ret = afs_update_cell(cell);
+ if (ret < 0)
+ cell->error = ret;
+ }
goto done;
}
- cell->state = AFS_CELL_DEACTIVATING;
+ smp_store_release(&cell->state, AFS_CELL_DEACTIVATING);
+ wake_up_var(&cell->state);
goto again;
case AFS_CELL_DEACTIVATING:
- set_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
if (atomic_read(&cell->usage) > 1)
goto reverse_deactivation;
afs_deactivate_cell(net, cell);
- cell->state = AFS_CELL_INACTIVE;
+ smp_store_release(&cell->state, AFS_CELL_INACTIVE);
+ wake_up_var(&cell->state);
goto again;
default:
@@ -669,17 +709,13 @@ activation_failed:
cell->error = ret;
afs_deactivate_cell(net, cell);
- cell->state = AFS_CELL_FAILED;
- smp_wmb();
- if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags))
- wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */
+ wake_up_var(&cell->state);
goto again;
reverse_deactivation:
- cell->state = AFS_CELL_ACTIVE;
- smp_wmb();
- clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
- wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
+ smp_store_release(&cell->state, AFS_CELL_ACTIVE);
+ wake_up_var(&cell->state);
_leave(" [deact->act]");
return;
@@ -739,11 +775,16 @@ void afs_manage_cells(struct work_struct *work)
}
if (usage == 1) {
+ struct afs_vlserver_list *vllist;
time64_t expire_at = cell->last_inactive;
- if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
- !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
+ read_lock(&cell->vl_servers_lock);
+ vllist = rcu_dereference_protected(
+ cell->vl_servers,
+ lockdep_is_held(&cell->vl_servers_lock));
+ if (vllist->nr_servers > 0)
expire_at += afs_cell_gc_delay;
+ read_unlock(&cell->vl_servers_lock);
if (purging || expire_at <= now)
sched_cell = true;
else if (expire_at < next_manage)
@@ -751,10 +792,8 @@ void afs_manage_cells(struct work_struct *work)
}
if (!purging) {
- if (cell->dns_expiry <= now)
+ if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags))
sched_cell = true;
- else if (cell->dns_expiry <= next_manage)
- next_manage = cell->dns_expiry;
}
if (sched_cell)
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 748090014519..01437cfe5432 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -213,7 +213,7 @@ static int afs_find_cm_server_by_peer(struct afs_call *call)
return 0;
}
- call->cm_server = server;
+ call->server = server;
return afs_record_cm_probe(call, server);
}
@@ -234,7 +234,7 @@ static int afs_find_cm_server_by_uuid(struct afs_call *call,
return 0;
}
- call->cm_server = server;
+ call->server = server;
return afs_record_cm_probe(call, server);
}
@@ -260,8 +260,8 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
* server holds up change visibility till it receives our reply so as
* to maintain cache coherency.
*/
- if (call->cm_server)
- afs_break_callbacks(call->cm_server, call->count, call->request);
+ if (call->server)
+ afs_break_callbacks(call->server, call->count, call->request);
afs_send_empty_reply(call);
afs_put_call(call);
@@ -376,10 +376,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
{
struct afs_call *call = container_of(work, struct afs_call, work);
- _enter("{%p}", call->cm_server);
+ _enter("{%p}", call->server);
- if (call->cm_server)
- afs_init_callback_state(call->cm_server);
+ if (call->server)
+ afs_init_callback_state(call->server);
afs_send_empty_reply(call);
afs_put_call(call);
_leave("");
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 9a466be583d2..79d93a26759a 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
+#include "afs_fs.h"
#include "xdr_fs.h"
static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
@@ -102,8 +103,8 @@ struct afs_lookup_cookie {
bool found;
bool one_only;
unsigned short nr_fids;
- struct afs_file_status *statuses;
- struct afs_callback *callbacks;
+ struct inode **inodes;
+ struct afs_status_cb *statuses;
struct afs_fid fids[50];
};
@@ -638,12 +639,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
struct key *key)
{
struct afs_lookup_cookie *cookie;
- struct afs_cb_interest *cbi = NULL;
+ struct afs_cb_interest *dcbi, *cbi = NULL;
struct afs_super_info *as = dir->i_sb->s_fs_info;
- struct afs_iget_data data;
+ struct afs_status_cb *scb;
+ struct afs_iget_data iget_data;
struct afs_fs_cursor fc;
- struct afs_vnode *dvnode = AFS_FS_I(dir);
- struct inode *inode = NULL;
+ struct afs_server *server;
+ struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
+ struct inode *inode = NULL, *ti;
int ret, i;
_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
@@ -657,10 +660,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */
read_seqlock_excl(&dvnode->cb_lock);
- if (dvnode->cb_interest &&
- dvnode->cb_interest->server &&
- test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags))
- cookie->one_only = true;
+ dcbi = rcu_dereference_protected(dvnode->cb_interest,
+ lockdep_is_held(&dvnode->cb_lock.lock));
+ if (dcbi) {
+ server = dcbi->server;
+ if (server &&
+ test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags))
+ cookie->one_only = true;
+ }
read_sequnlock_excl(&dvnode->cb_lock);
for (i = 0; i < 50; i++)
@@ -678,24 +685,43 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
goto out;
/* Check to see if we already have an inode for the primary fid. */
- data.volume = dvnode->volume;
- data.fid = cookie->fids[0];
- inode = ilookup5(dir->i_sb, cookie->fids[0].vnode, afs_iget5_test, &data);
+ iget_data.fid = cookie->fids[0];
+ iget_data.volume = dvnode->volume;
+ iget_data.cb_v_break = dvnode->volume->cb_v_break;
+ iget_data.cb_s_break = 0;
+ inode = ilookup5(dir->i_sb, cookie->fids[0].vnode,
+ afs_iget5_test, &iget_data);
if (inode)
goto out;
/* Need space for examining all the selected files */
inode = ERR_PTR(-ENOMEM);
- cookie->statuses = kcalloc(cookie->nr_fids, sizeof(struct afs_file_status),
- GFP_KERNEL);
+ cookie->statuses = kvcalloc(cookie->nr_fids, sizeof(struct afs_status_cb),
+ GFP_KERNEL);
if (!cookie->statuses)
goto out;
- cookie->callbacks = kcalloc(cookie->nr_fids, sizeof(struct afs_callback),
- GFP_KERNEL);
- if (!cookie->callbacks)
+ cookie->inodes = kcalloc(cookie->nr_fids, sizeof(struct inode *),
+ GFP_KERNEL);
+ if (!cookie->inodes)
goto out_s;
+ for (i = 1; i < cookie->nr_fids; i++) {
+ scb = &cookie->statuses[i];
+
+ /* Find any inodes that already exist and get their
+ * callback counters.
+ */
+ iget_data.fid = cookie->fids[i];
+ ti = ilookup5_nowait(dir->i_sb, iget_data.fid.vnode,
+ afs_iget5_test, &iget_data);
+ if (!IS_ERR_OR_NULL(ti)) {
+ vnode = AFS_FS_I(ti);
+ scb->cb_break = afs_calc_vnode_cb_break(vnode);
+ cookie->inodes[i] = ti;
+ }
+ }
+
/* Try FS.InlineBulkStatus first. Abort codes for the individual
* lookups contained therein are stored in the reply without aborting
* the whole operation.
@@ -704,7 +730,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
goto no_inline_bulk_status;
inode = ERR_PTR(-ERESTARTSYS);
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
while (afs_select_fileserver(&fc)) {
if (test_bit(AFS_SERVER_FL_NO_IBULK,
&fc.cbi->server->flags)) {
@@ -712,11 +738,12 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
fc.ac.error = -ECONNABORTED;
break;
}
+ iget_data.cb_v_break = dvnode->volume->cb_v_break;
+ iget_data.cb_s_break = fc.cbi->server->cb_s_break;
afs_fs_inline_bulk_status(&fc,
afs_v2net(dvnode),
cookie->fids,
cookie->statuses,
- cookie->callbacks,
cookie->nr_fids, NULL);
}
@@ -737,15 +764,16 @@ no_inline_bulk_status:
* any of the lookups fails - so, for the moment, revert to
* FS.FetchStatus for just the primary fid.
*/
- cookie->nr_fids = 1;
inode = ERR_PTR(-ERESTARTSYS);
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
while (afs_select_fileserver(&fc)) {
+ iget_data.cb_v_break = dvnode->volume->cb_v_break;
+ iget_data.cb_s_break = fc.cbi->server->cb_s_break;
+ scb = &cookie->statuses[0];
afs_fs_fetch_status(&fc,
afs_v2net(dvnode),
cookie->fids,
- cookie->statuses,
- cookie->callbacks,
+ scb,
NULL);
}
@@ -757,26 +785,36 @@ no_inline_bulk_status:
if (IS_ERR(inode))
goto out_c;
- for (i = 0; i < cookie->nr_fids; i++)
- cookie->statuses[i].abort_code = 0;
-
success:
/* Turn all the files into inodes and save the first one - which is the
* one we actually want.
*/
- if (cookie->statuses[0].abort_code != 0)
- inode = ERR_PTR(afs_abort_to_error(cookie->statuses[0].abort_code));
+ scb = &cookie->statuses[0];
+ if (scb->status.abort_code != 0)
+ inode = ERR_PTR(afs_abort_to_error(scb->status.abort_code));
for (i = 0; i < cookie->nr_fids; i++) {
- struct inode *ti;
+ struct afs_status_cb *scb = &cookie->statuses[i];
+
+ if (!scb->have_status && !scb->have_error)
+ continue;
+
+ if (cookie->inodes[i]) {
+ afs_vnode_commit_status(&fc, AFS_FS_I(cookie->inodes[i]),
+ scb->cb_break, NULL, scb);
+ continue;
+ }
- if (cookie->statuses[i].abort_code != 0)
+ if (scb->status.abort_code != 0)
continue;
- ti = afs_iget(dir->i_sb, key, &cookie->fids[i],
- &cookie->statuses[i],
- &cookie->callbacks[i],
- cbi, dvnode);
+ iget_data.fid = cookie->fids[i];
+ ti = afs_iget(dir->i_sb, key, &iget_data, scb, cbi, dvnode);
+ if (!IS_ERR(ti))
+ afs_cache_permit(AFS_FS_I(ti), key,
+ 0 /* Assume vnode->cb_break is 0 */ +
+ iget_data.cb_v_break,
+ scb);
if (i == 0) {
inode = ti;
} else {
@@ -787,9 +825,13 @@ success:
out_c:
afs_put_cb_interest(afs_v2net(dvnode), cbi);
- kfree(cookie->callbacks);
+ if (cookie->inodes) {
+ for (i = 0; i < cookie->nr_fids; i++)
+ iput(cookie->inodes[i]);
+ kfree(cookie->inodes);
+ }
out_s:
- kfree(cookie->statuses);
+ kvfree(cookie->statuses);
out:
kfree(cookie);
return inode;
@@ -1114,9 +1156,8 @@ void afs_d_release(struct dentry *dentry)
*/
static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
struct dentry *new_dentry,
- struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb)
+ struct afs_iget_data *new_data,
+ struct afs_status_cb *new_scb)
{
struct afs_vnode *vnode;
struct inode *inode;
@@ -1125,7 +1166,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
return;
inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
- newfid, newstatus, newcb, fc->cbi, fc->vnode);
+ new_data, new_scb, fc->cbi, fc->vnode);
if (IS_ERR(inode)) {
/* ENOMEM or EINTR at a really inconvenient time - just abandon
* the new directory on the server.
@@ -1136,22 +1177,29 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
vnode = AFS_FS_I(inode);
set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- afs_vnode_commit_status(fc, vnode, 0);
+ if (fc->ac.error == 0)
+ afs_cache_permit(vnode, fc->key, vnode->cb_break, new_scb);
d_instantiate(new_dentry, inode);
}
+static void afs_prep_for_new_inode(struct afs_fs_cursor *fc,
+ struct afs_iget_data *iget_data)
+{
+ iget_data->volume = fc->vnode->volume;
+ iget_data->cb_v_break = fc->vnode->volume->cb_v_break;
+ iget_data->cb_s_break = fc->cbi->server->cb_s_break;
+}
+
/*
* create a directory on an AFS filesystem
*/
static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
- struct afs_file_status newstatus;
+ struct afs_iget_data iget_data;
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
- struct afs_callback newcb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
- struct afs_fid newfid;
struct key *key;
- u64 data_version = dvnode->status.data_version;
int ret;
mode |= S_IFDIR;
@@ -1159,23 +1207,32 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
_enter("{%llx:%llu},{%pd},%ho",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
- &newfid, &newstatus, &newcb);
+ afs_prep_for_new_inode(&fc, &iget_data);
+ afs_fs_create(&fc, dentry->d_name.name, mode,
+ &scb[0], &iget_data.fid, &scb[1]);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
- afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
+ afs_check_for_remote_deletion(&fc, dvnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
@@ -1185,15 +1242,18 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (ret == 0 &&
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+ afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create);
key_put(key);
+ kfree(scb);
_leave(" = 0");
return 0;
error_key:
key_put(key);
+error_scb:
+ kfree(scb);
error:
d_drop(dentry);
_leave(" = %d", ret);
@@ -1220,15 +1280,19 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
*/
static int afs_rmdir(struct inode *dir, struct dentry *dentry)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
- u64 data_version = dvnode->status.data_version;
int ret;
_enter("{%llx:%llu},{%pd}",
dvnode->fid.vid, dvnode->fid.vnode, dentry);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
@@ -1250,14 +1314,16 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_remove(&fc, vnode, dentry->d_name.name, true,
- data_version);
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, true, scb);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
if (ret == 0) {
afs_dir_remove_subdir(dentry);
@@ -1272,6 +1338,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
error_key:
key_put(key);
error:
+ kfree(scb);
return ret;
}
@@ -1285,32 +1352,27 @@ error:
* However, if we didn't have a callback promise outstanding, or it was
* outstanding on a different server, then it won't break it either...
*/
-int afs_dir_remove_link(struct dentry *dentry, struct key *key,
- unsigned long d_version_before,
- unsigned long d_version_after)
+static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry,
+ struct key *key)
{
- bool dir_valid;
int ret = 0;
- /* There were no intervening changes on the server if the version
- * number we got back was incremented by exactly 1.
- */
- dir_valid = (d_version_after == d_version_before + 1);
-
if (d_really_is_positive(dentry)) {
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
/* Already done */
- } else if (dir_valid) {
+ } else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+ write_seqlock(&vnode->cb_lock);
drop_nlink(&vnode->vfs_inode);
if (vnode->vfs_inode.i_nlink == 0) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ __afs_break_callback(vnode);
}
+ write_sequnlock(&vnode->cb_lock);
ret = 0;
} else {
- clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ afs_break_callback(vnode);
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
kdebug("AFS_VNODE_DELETED");
@@ -1331,11 +1393,10 @@ int afs_dir_remove_link(struct dentry *dentry, struct key *key,
static int afs_unlink(struct inode *dir, struct dentry *dentry)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
- unsigned long d_version = (unsigned long)dentry->d_fsdata;
bool need_rehash = false;
- u64 data_version = dvnode->status.data_version;
int ret;
_enter("{%llx:%llu},{%pd}",
@@ -1344,10 +1405,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
if (dentry->d_name.len >= AFSNAMEMAX)
return -ENAMETOOLONG;
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
/* Try to make sure we have a callback promise on the victim. */
@@ -1374,30 +1440,34 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
spin_unlock(&dentry->d_lock);
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+ afs_dataversion_t data_version_2 = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
+ fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
!test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
- data_version);
+ &scb[0], &scb[1]);
if (fc.ac.error != -ECONNABORTED ||
fc.ac.abort_code != RXGEN_OPCODE)
continue;
set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
}
- afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
- data_version);
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, false, &scb[0]);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
+ &data_version_2, &scb[1]);
ret = afs_end_vnode_operation(&fc);
- if (ret == 0)
- ret = afs_dir_remove_link(
- dentry, key, d_version,
- (unsigned long)dvnode->status.data_version);
+ if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
+ ret = afs_dir_remove_link(dvnode, dentry, key);
if (ret == 0 &&
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_edit_dir_remove(dvnode, &dentry->d_name,
@@ -1409,6 +1479,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
error_key:
key_put(key);
+error_scb:
+ kfree(scb);
error:
_leave(" = %d", ret);
return ret;
@@ -1420,13 +1492,11 @@ error:
static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
+ struct afs_iget_data iget_data;
struct afs_fs_cursor fc;
- struct afs_file_status newstatus;
- struct afs_callback newcb;
+ struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
- struct afs_fid newfid;
struct key *key;
- u64 data_version = dvnode->status.data_version;
int ret;
mode |= S_IFREG;
@@ -1444,17 +1514,26 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
goto error;
}
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error_scb;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
- &newfid, &newstatus, &newcb);
+ afs_prep_for_new_inode(&fc, &iget_data);
+ afs_fs_create(&fc, dentry->d_name.name, mode,
+ &scb[0], &iget_data.fid, &scb[1]);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
- afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, &newcb);
+ afs_check_for_remote_deletion(&fc, dvnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
@@ -1463,13 +1542,16 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+ afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create);
+ kfree(scb);
key_put(key);
_leave(" = 0");
return 0;
+error_scb:
+ kfree(scb);
error_key:
key_put(key);
error:
@@ -1485,15 +1567,12 @@ static int afs_link(struct dentry *from, struct inode *dir,
struct dentry *dentry)
{
struct afs_fs_cursor fc;
- struct afs_vnode *dvnode, *vnode;
+ struct afs_status_cb *scb;
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
struct key *key;
- u64 data_version;
int ret;
- vnode = AFS_FS_I(d_inode(from));
- dvnode = AFS_FS_I(dir);
- data_version = dvnode->status.data_version;
-
_enter("{%llx:%llu},{%llx:%llu},{%pd}",
vnode->fid.vid, vnode->fid.vnode,
dvnode->fid.vid, dvnode->fid.vnode,
@@ -1503,14 +1582,21 @@ static int afs_link(struct dentry *from, struct inode *dir,
if (dentry->d_name.len >= AFSNAMEMAX)
goto error;
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
goto error_key;
@@ -1519,11 +1605,14 @@ static int afs_link(struct dentry *from, struct inode *dir,
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
fc.cb_break_2 = afs_calc_vnode_cb_break(vnode);
- afs_fs_link(&fc, vnode, dentry->d_name.name, data_version);
+ afs_fs_link(&fc, vnode, dentry->d_name.name,
+ &scb[0], &scb[1]);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break_2);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
+ NULL, &scb[1]);
ihold(&vnode->vfs_inode);
d_instantiate(dentry, &vnode->vfs_inode);
@@ -1540,11 +1629,14 @@ static int afs_link(struct dentry *from, struct inode *dir,
afs_edit_dir_for_link);
key_put(key);
+ kfree(scb);
_leave(" = 0");
return 0;
error_key:
key_put(key);
+error_scb:
+ kfree(scb);
error:
d_drop(dentry);
_leave(" = %d", ret);
@@ -1557,12 +1649,11 @@ error:
static int afs_symlink(struct inode *dir, struct dentry *dentry,
const char *content)
{
+ struct afs_iget_data iget_data;
struct afs_fs_cursor fc;
- struct afs_file_status newstatus;
+ struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
- struct afs_fid newfid;
struct key *key;
- u64 data_version = dvnode->status.data_version;
int ret;
_enter("{%llx:%llu},{%pd},%s",
@@ -1577,24 +1668,32 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
if (strlen(content) >= AFSPATHMAX)
goto error;
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
- afs_fs_symlink(&fc, dentry->d_name.name,
- content, data_version,
- &newfid, &newstatus);
+ afs_prep_for_new_inode(&fc, &iget_data);
+ afs_fs_symlink(&fc, dentry->d_name.name, content,
+ &scb[0], &iget_data.fid, &scb[1]);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
- afs_vnode_new_inode(&fc, dentry, &newfid, &newstatus, NULL);
+ afs_check_for_remote_deletion(&fc, dvnode);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &data_version, &scb[0]);
+ afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_key;
@@ -1603,15 +1702,18 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
}
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+ afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_symlink);
key_put(key);
+ kfree(scb);
_leave(" = 0");
return 0;
error_key:
key_put(key);
+error_scb:
+ kfree(scb);
error:
d_drop(dentry);
_leave(" = %d", ret);
@@ -1626,11 +1728,11 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
unsigned int flags)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
struct dentry *tmp = NULL, *rehash = NULL;
struct inode *new_inode;
struct key *key;
- u64 orig_data_version, new_data_version;
bool new_negative = d_is_negative(new_dentry);
int ret;
@@ -1644,8 +1746,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
vnode = AFS_FS_I(d_inode(old_dentry));
orig_dvnode = AFS_FS_I(old_dir);
new_dvnode = AFS_FS_I(new_dir);
- orig_data_version = orig_dvnode->status.data_version;
- new_data_version = new_dvnode->status.data_version;
_enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
@@ -1653,10 +1753,15 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dvnode->fid.vid, new_dvnode->fid.vnode,
new_dentry);
+ ret = -ENOMEM;
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
key = afs_request_key(orig_dvnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
/* For non-directories, check whether the target is busy and if so,
@@ -1690,31 +1795,43 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry = tmp;
rehash = NULL;
new_negative = true;
- orig_data_version = orig_dvnode->status.data_version;
- new_data_version = new_dvnode->status.data_version;
}
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, orig_dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
+ afs_dataversion_t orig_data_version;
+ afs_dataversion_t new_data_version;
+ struct afs_status_cb *new_scb = &scb[1];
+
+ orig_data_version = orig_dvnode->status.data_version + 1;
+
if (orig_dvnode != new_dvnode) {
if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
goto error_rehash;
}
+ new_data_version = new_dvnode->status.data_version;
+ } else {
+ new_data_version = orig_data_version;
+ new_scb = &scb[0];
}
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode);
fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
afs_fs_rename(&fc, old_dentry->d_name.name,
new_dvnode, new_dentry->d_name.name,
- orig_data_version, new_data_version);
+ &scb[0], new_scb);
}
- afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break);
- afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2);
- if (orig_dvnode != new_dvnode)
+ afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break,
+ &orig_data_version, &scb[0]);
+ if (new_dvnode != orig_dvnode) {
+ afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2,
+ &new_data_version, &scb[1]);
mutex_unlock(&new_dvnode->io_lock);
+ }
ret = afs_end_vnode_operation(&fc);
if (ret < 0)
goto error_rehash;
@@ -1754,6 +1871,8 @@ error_tmp:
if (tmp)
dput(tmp);
key_put(key);
+error_scb:
+ kfree(scb);
error:
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index f6f89fdab6b2..28f4aa015229 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -24,21 +24,28 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
struct key *key)
{
struct afs_fs_cursor fc;
- u64 dir_data_version = dvnode->status.data_version;
+ struct afs_status_cb *scb;
int ret = -ERESTARTSYS;
_enter("%pd,%pd", old, new);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
trace_afs_silly_rename(vnode, false);
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
+ afs_dataversion_t dir_data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
afs_fs_rename(&fc, old->d_name.name,
dvnode, new->d_name.name,
- dir_data_version, dir_data_version);
+ scb, scb);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &dir_data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
@@ -64,6 +71,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
fsnotify_nameremove(old, 0);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
@@ -143,31 +151,37 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
struct dentry *dentry, struct key *key)
{
struct afs_fs_cursor fc;
- u64 dir_data_version = dvnode->status.data_version;
+ struct afs_status_cb *scb;
int ret = -ERESTARTSYS;
_enter("");
+ scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
trace_afs_silly_rename(vnode, true);
- if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+ if (afs_begin_vnode_operation(&fc, dvnode, key, false)) {
+ afs_dataversion_t dir_data_version = dvnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
!test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
- dir_data_version);
+ &scb[0], &scb[1]);
if (fc.ac.error != -ECONNABORTED ||
fc.ac.abort_code != RXGEN_OPCODE)
continue;
set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
}
- afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
- dir_data_version);
+ afs_fs_remove(&fc, vnode, dentry->d_name.name, false, &scb[0]);
}
- afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
+ &dir_data_version, &scb[0]);
ret = afs_end_vnode_operation(&fc);
if (ret == 0) {
drop_nlink(&vnode->vfs_inode);
@@ -182,6 +196,7 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
afs_edit_dir_for_unlink);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index a9ba81ddf154..af1689d1f32e 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry)
return 0;
}
- ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL);
+ ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL, false);
if (ret == -ENODATA)
ret = -EDESTADDRREQ;
return ret;
@@ -261,8 +261,7 @@ int afs_dynroot_populate(struct super_block *sb)
struct afs_net *net = afs_sb2net(sb);
int ret;
- if (mutex_lock_interruptible(&net->proc_cells_lock) < 0)
- return -ERESTARTSYS;
+ mutex_lock(&net->proc_cells_lock);
net->dynroot_sb = sb;
hlist_for_each_entry(cell, &net->proc_cells, proc_link) {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index e8d6619890a9..11e69c5fb7ab 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -170,11 +170,12 @@ int afs_release(struct inode *inode, struct file *file)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_file *af = file->private_data;
+ int ret = 0;
_enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
if ((file->f_mode & FMODE_WRITE))
- return vfs_fsync(file, 0);
+ ret = vfs_fsync(file, 0);
file->private_data = NULL;
if (af->wb)
@@ -182,8 +183,8 @@ int afs_release(struct inode *inode, struct file *file)
key_put(af->key);
kfree(af);
afs_prune_wb_keys(vnode);
- _leave(" = 0");
- return 0;
+ _leave(" = %d", ret);
+ return ret;
}
/*
@@ -227,6 +228,7 @@ static void afs_file_readpage_read_complete(struct page *page,
int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *desc)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
int ret;
_enter("%s{%llx:%llu.%u},%x,,,",
@@ -236,15 +238,22 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
vnode->fid.unique,
key_serial(key));
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_fetch_data(&fc, desc);
+ afs_fs_fetch_data(&fc, scb, desc);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
@@ -254,6 +263,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
&afs_v2net(vnode)->n_fetch_bytes);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
@@ -404,10 +414,10 @@ static int afs_readpage(struct file *file, struct page *page)
/*
* Make pages available as they're filled.
*/
-static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req)
+static void afs_readpages_page_done(struct afs_read *req)
{
#ifdef CONFIG_AFS_FSCACHE
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_vnode *vnode = req->vnode;
#endif
struct page *page = req->pages[req->index];
@@ -461,6 +471,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
return -ENOMEM;
refcount_set(&req->usage, 1);
+ req->vnode = vnode;
req->page_done = afs_readpages_page_done;
req->pos = first->index;
req->pos <<= PAGE_SHIFT;
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index adc88eff7849..ed3ac03682d7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -41,9 +41,6 @@ void afs_lock_may_be_available(struct afs_vnode *vnode)
{
_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
- if (vnode->lock_state != AFS_VNODE_LOCK_WAITING_FOR_CB)
- return;
-
spin_lock(&vnode->lock);
if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
afs_next_locker(vnode, 0);
@@ -77,7 +74,7 @@ static void afs_schedule_lock_extension(struct afs_vnode *vnode)
*/
void afs_lock_op_done(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_vnode *vnode = call->lvnode;
if (call->error == 0) {
spin_lock(&vnode->lock);
@@ -185,6 +182,7 @@ static void afs_kill_lockers_enoent(struct afs_vnode *vnode)
static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
afs_lock_type_t type)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
int ret;
@@ -195,18 +193,23 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
vnode->fid.unique,
key_serial(key), type);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_set_lock(&fc, type);
+ afs_fs_set_lock(&fc, type, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb);
ret = afs_end_vnode_operation(&fc);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
@@ -216,6 +219,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
*/
static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
int ret;
@@ -226,18 +230,23 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
vnode->fid.unique,
key_serial(key));
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, false)) {
while (afs_select_current_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_extend_lock(&fc);
+ afs_fs_extend_lock(&fc, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb);
ret = afs_end_vnode_operation(&fc);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
@@ -247,6 +256,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
*/
static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
int ret;
@@ -257,18 +267,23 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
vnode->fid.unique,
key_serial(key));
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, false)) {
while (afs_select_current_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_release_lock(&fc);
+ afs_fs_release_lock(&fc, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb);
ret = afs_end_vnode_operation(&fc);
}
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
@@ -736,7 +751,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
posix_test_lock(file, fl);
if (fl->fl_type == F_UNLCK) {
/* no local locks; consult the server */
- ret = afs_fetch_status(vnode, key, false);
+ ret = afs_fetch_status(vnode, key, false, NULL);
if (ret < 0)
goto error;
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index 5d3abde52a0f..9b7266209343 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -33,8 +33,8 @@ static bool afs_fs_probe_done(struct afs_server *server)
void afs_fileserver_probe_result(struct afs_call *call)
{
struct afs_addr_list *alist = call->alist;
- struct afs_server *server = call->reply[0];
- unsigned int server_index = (long)call->reply[1];
+ struct afs_server *server = call->server;
+ unsigned int server_index = call->server_index;
unsigned int index = call->addr_ix;
unsigned int rtt = UINT_MAX;
bool have_result = false;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 1296f5dc4c1e..48298408d6ac 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -60,78 +60,17 @@ static void xdr_dump_bad(const __be32 *bp)
}
/*
- * Update the core inode struct from a returned status record.
- */
-void afs_update_inode_from_status(struct afs_vnode *vnode,
- struct afs_file_status *status,
- const afs_dataversion_t *expected_version,
- u8 flags)
-{
- struct timespec64 t;
- umode_t mode;
-
- t = status->mtime_client;
- vnode->vfs_inode.i_ctime = t;
- vnode->vfs_inode.i_mtime = t;
- vnode->vfs_inode.i_atime = t;
-
- if (flags & (AFS_VNODE_META_CHANGED | AFS_VNODE_NOT_YET_SET)) {
- vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner);
- vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group);
- set_nlink(&vnode->vfs_inode, status->nlink);
-
- mode = vnode->vfs_inode.i_mode;
- mode &= ~S_IALLUGO;
- mode |= status->mode;
- barrier();
- vnode->vfs_inode.i_mode = mode;
- }
-
- if (!(flags & AFS_VNODE_NOT_YET_SET)) {
- if (expected_version &&
- *expected_version != status->data_version) {
- _debug("vnode modified %llx on {%llx:%llu} [exp %llx]",
- (unsigned long long) status->data_version,
- vnode->fid.vid, vnode->fid.vnode,
- (unsigned long long) *expected_version);
- vnode->invalid_before = status->data_version;
- if (vnode->status.type == AFS_FTYPE_DIR) {
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- afs_stat_v(vnode, n_inval);
- } else {
- set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
- }
- } else if (vnode->status.type == AFS_FTYPE_DIR) {
- /* Expected directory change is handled elsewhere so
- * that we can locally edit the directory and save on a
- * download.
- */
- if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- flags &= ~AFS_VNODE_DATA_CHANGED;
- }
- }
-
- if (flags & (AFS_VNODE_DATA_CHANGED | AFS_VNODE_NOT_YET_SET)) {
- inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
- i_size_write(&vnode->vfs_inode, status->size);
- }
-}
-
-/*
* decode an AFSFetchStatus block
*/
-static int xdr_decode_AFSFetchStatus(struct afs_call *call,
- const __be32 **_bp,
- struct afs_file_status *status,
- struct afs_vnode *vnode,
- const afs_dataversion_t *expected_version,
- struct afs_read *read_req)
+static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
+ struct afs_file_status *status = &scb->status;
bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
u64 data_version, size;
u32 type, abort_code;
- u8 flags = 0;
abort_code = ntohl(xdr->abort_code);
@@ -144,6 +83,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
* case.
*/
status->abort_code = abort_code;
+ scb->have_error = true;
return 0;
}
@@ -161,44 +101,25 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
case AFS_FTYPE_FILE:
case AFS_FTYPE_DIR:
case AFS_FTYPE_SYMLINK:
- if (type != status->type &&
- vnode &&
- !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
- pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- status->type, type);
- goto bad;
- }
status->type = type;
break;
default:
goto bad;
}
-#define EXTRACT_M(FIELD) \
- do { \
- u32 x = ntohl(xdr->FIELD); \
- if (status->FIELD != x) { \
- flags |= AFS_VNODE_META_CHANGED; \
- status->FIELD = x; \
- } \
- } while (0)
-
- EXTRACT_M(nlink);
- EXTRACT_M(author);
- EXTRACT_M(owner);
- EXTRACT_M(caller_access); /* call ticket dependent */
- EXTRACT_M(anon_access);
- EXTRACT_M(mode);
- EXTRACT_M(group);
+ status->nlink = ntohl(xdr->nlink);
+ status->author = ntohl(xdr->author);
+ status->owner = ntohl(xdr->owner);
+ status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */
+ status->anon_access = ntohl(xdr->anon_access);
+ status->mode = ntohl(xdr->mode) & S_IALLUGO;
+ status->group = ntohl(xdr->group);
+ status->lock_count = ntohl(xdr->lock_count);
status->mtime_client.tv_sec = ntohl(xdr->mtime_client);
status->mtime_client.tv_nsec = 0;
status->mtime_server.tv_sec = ntohl(xdr->mtime_server);
status->mtime_server.tv_nsec = 0;
- status->lock_count = ntohl(xdr->lock_count);
size = (u64)ntohl(xdr->size_lo);
size |= (u64)ntohl(xdr->size_hi) << 32;
@@ -206,25 +127,10 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
data_version = (u64)ntohl(xdr->data_version_lo);
data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
- if (data_version != status->data_version) {
- status->data_version = data_version;
- flags |= AFS_VNODE_DATA_CHANGED;
- }
-
- if (read_req) {
- read_req->data_version = data_version;
- read_req->file_size = size;
- }
+ status->data_version = data_version;
+ scb->have_status = true;
*_bp = (const void *)*_bp + sizeof(*xdr);
-
- if (vnode) {
- if (test_bit(AFS_VNODE_UNSET, &vnode->flags))
- flags |= AFS_VNODE_NOT_YET_SET;
- afs_update_inode_from_status(vnode, status, expected_version,
- flags);
- }
-
return 0;
bad:
@@ -232,77 +138,22 @@ bad:
return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
}
-/*
- * Decode the file status. We need to lock the target vnode if we're going to
- * update its status so that stat() sees the attributes update atomically.
- */
-static int afs_decode_status(struct afs_call *call,
- const __be32 **_bp,
- struct afs_file_status *status,
- struct afs_vnode *vnode,
- const afs_dataversion_t *expected_version,
- struct afs_read *read_req)
+static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
{
- int ret;
-
- if (!vnode)
- return xdr_decode_AFSFetchStatus(call, _bp, status, vnode,
- expected_version, read_req);
-
- write_seqlock(&vnode->cb_lock);
- ret = xdr_decode_AFSFetchStatus(call, _bp, status, vnode,
- expected_version, read_req);
- write_sequnlock(&vnode->cb_lock);
- return ret;
+ return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry;
}
-/*
- * decode an AFSCallBack block
- */
-static void xdr_decode_AFSCallBack(struct afs_call *call,
- struct afs_vnode *vnode,
- const __be32 **_bp)
+static void xdr_decode_AFSCallBack(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
- struct afs_cb_interest *old, *cbi = call->cbi;
+ struct afs_callback *cb = &scb->callback;
const __be32 *bp = *_bp;
- u32 cb_expiry;
-
- write_seqlock(&vnode->cb_lock);
-
- if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
- vnode->cb_version = ntohl(*bp++);
- cb_expiry = ntohl(*bp++);
- vnode->cb_type = ntohl(*bp++);
- vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds();
- old = vnode->cb_interest;
- if (old != call->cbi) {
- vnode->cb_interest = cbi;
- cbi = old;
- }
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- } else {
- bp += 3;
- }
- write_sequnlock(&vnode->cb_lock);
- call->cbi = cbi;
- *_bp = bp;
-}
-
-static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
-{
- return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC);
-}
-
-static void xdr_decode_AFSCallBack_raw(struct afs_call *call,
- const __be32 **_bp,
- struct afs_callback *cb)
-{
- const __be32 *bp = *_bp;
-
- cb->version = ntohl(*bp++);
+ bp++; /* version */
cb->expires_at = xdr_decode_expiry(call, ntohl(*bp++));
- cb->type = ntohl(*bp++);
+ bp++; /* type */
+ scb->have_cb = true;
*_bp = bp;
}
@@ -395,7 +246,6 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
*/
static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -403,16 +253,13 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
if (ret < 0)
return ret;
- _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSCallBack(call, vnode, &bp);
- xdr_decode_AFSVolSync(&bp, call->reply[1]);
+ xdr_decode_AFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -431,8 +278,8 @@ static const struct afs_call_type afs_RXFSFetchStatus_vnode = {
/*
* fetch the status information for a file
*/
-int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync,
- bool new_inode)
+int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb,
+ struct afs_volsync *volsync)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -440,7 +287,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_fetch_file_status(fc, volsync, new_inode);
+ return yfs_fs_fetch_file_status(fc, scb, volsync);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
@@ -453,10 +300,8 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
}
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = volsync;
- call->expected_version = new_inode ? 1 : vnode->status.data_version;
- call->want_reply_time = true;
+ call->out_scb = scb;
+ call->out_volsync = volsync;
/* marshall the parameters */
bp = call->request;
@@ -465,10 +310,10 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
bp[2] = htonl(vnode->fid.vnode);
bp[3] = htonl(vnode->fid.unique);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -478,8 +323,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
*/
static int afs_deliver_fs_fetch_data(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
- struct afs_read *req = call->reply[2];
+ struct afs_read *req = call->read_request;
const __be32 *bp;
unsigned int size;
int ret;
@@ -541,7 +385,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
if (req->offset == PAGE_SIZE) {
req->offset = 0;
if (req->page_done)
- req->page_done(call, req);
+ req->page_done(req);
req->index++;
if (req->remain > 0)
goto begin_page;
@@ -575,12 +419,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &vnode->status.data_version, req);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSCallBack(call, vnode, &bp);
- xdr_decode_AFSVolSync(&bp, call->reply[1]);
+ xdr_decode_AFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
+
+ req->data_version = call->out_scb->status.data_version;
+ req->file_size = call->out_scb->status.size;
call->unmarshall++;
@@ -593,7 +439,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
zero_user_segment(req->pages[req->index],
req->offset, PAGE_SIZE);
if (req->page_done)
- req->page_done(call, req);
+ req->page_done(req);
req->offset = 0;
}
@@ -603,7 +449,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
static void afs_fetch_data_destructor(struct afs_call *call)
{
- struct afs_read *req = call->reply[2];
+ struct afs_read *req = call->read_request;
afs_put_read(req);
afs_flat_call_destructor(call);
@@ -629,7 +475,9 @@ static const struct afs_call_type afs_RXFSFetchData64 = {
/*
* fetch data from a very large file
*/
-static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
+static int afs_fs_fetch_data64(struct afs_fs_cursor *fc,
+ struct afs_status_cb *scb,
+ struct afs_read *req)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -643,11 +491,9 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = NULL; /* volsync */
- call->reply[2] = req;
- call->expected_version = vnode->status.data_version;
- call->want_reply_time = true;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
+ call->read_request = req;
/* marshall the parameters */
bp = call->request;
@@ -661,9 +507,9 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
bp[7] = htonl(lower_32_bits(req->len));
refcount_inc(&req->usage);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -671,7 +517,9 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
/*
* fetch data from a file
*/
-int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
+int afs_fs_fetch_data(struct afs_fs_cursor *fc,
+ struct afs_status_cb *scb,
+ struct afs_read *req)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -679,12 +527,12 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_fetch_data(fc, req);
+ return yfs_fs_fetch_data(fc, scb, req);
if (upper_32_bits(req->pos) ||
upper_32_bits(req->len) ||
upper_32_bits(req->pos + req->len))
- return afs_fs_fetch_data64(fc, req);
+ return afs_fs_fetch_data64(fc, scb, req);
_enter("");
@@ -693,11 +541,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = NULL; /* volsync */
- call->reply[2] = req;
- call->expected_version = vnode->status.data_version;
- call->want_reply_time = true;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
+ call->read_request = req;
/* marshall the parameters */
bp = call->request;
@@ -709,9 +555,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
bp[5] = htonl(lower_32_bits(req->len));
refcount_inc(&req->usage);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -721,28 +567,24 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
*/
static int afs_deliver_fs_create_vnode(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
- _enter("{%u}", call->unmarshall);
-
ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFid(&bp, call->reply[1]);
- ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ xdr_decode_AFSFid(&bp, call->out_fid);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]);
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -771,24 +613,23 @@ static const struct afs_call_type afs_RXFSMakeDir = {
int afs_fs_create(struct afs_fs_cursor *fc,
const char *name,
umode_t mode,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb)
+ struct afs_status_cb *new_scb)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, padsz;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){
if (S_ISDIR(mode))
- return yfs_fs_make_dir(fc, name, mode, current_data_version,
- newfid, newstatus, newcb);
+ return yfs_fs_make_dir(fc, name, mode, dvnode_scb,
+ newfid, new_scb);
else
- return yfs_fs_create_file(fc, name, mode, current_data_version,
- newfid, newstatus, newcb);
+ return yfs_fs_create_file(fc, name, mode, dvnode_scb,
+ newfid, new_scb);
}
_enter("");
@@ -804,19 +645,16 @@ int afs_fs_create(struct afs_fs_cursor *fc,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->reply[3] = newcb;
- call->expected_version = current_data_version + 1;
- call->want_reply_time = true;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = new_scb;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(S_ISDIR(mode) ? FSMAKEDIR : FSCREATEFILE);
- *bp++ = htonl(vnode->fid.vid);
- *bp++ = htonl(vnode->fid.vnode);
- *bp++ = htonl(vnode->fid.unique);
+ *bp++ = htonl(dvnode->fid.vid);
+ *bp++ = htonl(dvnode->fid.vnode);
+ *bp++ = htonl(dvnode->fid.unique);
*bp++ = htonl(namesz);
memcpy(bp, name, namesz);
bp = (void *) bp + namesz;
@@ -825,41 +663,38 @@ int afs_fs_create(struct afs_fs_cursor *fc,
bp = (void *) bp + padsz;
}
*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
- *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
+ *bp++ = htonl(dvnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
*bp++ = 0; /* segment size */
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call1(call, &vnode->fid, name);
+ trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
- * Deliver reply data to any operation that returns file status and volume
+ * Deliver reply data to any operation that returns directory status and volume
* sync.
*/
-static int afs_deliver_fs_status_and_vol(struct afs_call *call)
+static int afs_deliver_fs_dir_status_and_vol(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
- _enter("{%u}", call->unmarshall);
-
ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -871,14 +706,14 @@ static int afs_deliver_fs_status_and_vol(struct afs_call *call)
static const struct afs_call_type afs_RXFSRemoveFile = {
.name = "FS.RemoveFile",
.op = afs_FS_RemoveFile,
- .deliver = afs_deliver_fs_status_and_vol,
+ .deliver = afs_deliver_fs_dir_status_and_vol,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type afs_RXFSRemoveDir = {
.name = "FS.RemoveDir",
.op = afs_FS_RemoveDir,
- .deliver = afs_deliver_fs_status_and_vol,
+ .deliver = afs_deliver_fs_dir_status_and_vol,
.destructor = afs_flat_call_destructor,
};
@@ -886,7 +721,7 @@ static const struct afs_call_type afs_RXFSRemoveDir = {
* remove a file or directory
*/
int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, bool isdir, u64 current_data_version)
+ const char *name, bool isdir, struct afs_status_cb *dvnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
@@ -895,7 +730,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_remove(fc, vnode, name, isdir, current_data_version);
+ return yfs_fs_remove(fc, vnode, name, isdir, dvnode_scb);
_enter("");
@@ -910,9 +745,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
/* marshall the parameters */
bp = call->request;
@@ -930,6 +763,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -939,7 +773,6 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
*/
static int afs_deliver_fs_link(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
const __be32 *bp;
int ret;
@@ -951,14 +784,13 @@ static int afs_deliver_fs_link(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = afs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -978,7 +810,9 @@ static const struct afs_call_type afs_RXFSLink = {
* make a hard link
*/
int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, u64 current_data_version)
+ const char *name,
+ struct afs_status_cb *dvnode_scb,
+ struct afs_status_cb *vnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
@@ -987,7 +821,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_link(fc, vnode, name, current_data_version);
+ return yfs_fs_link(fc, vnode, name, dvnode_scb, vnode_scb);
_enter("");
@@ -1000,9 +834,8 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_scb = vnode_scb;
/* marshall the parameters */
bp = call->request;
@@ -1023,6 +856,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &vnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1032,7 +866,6 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
*/
static int afs_deliver_fs_symlink(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -1044,15 +877,14 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFid(&bp, call->reply[1]);
- ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ xdr_decode_AFSFid(&bp, call->out_fid);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -1074,19 +906,19 @@ static const struct afs_call_type afs_RXFSSymlink = {
int afs_fs_symlink(struct afs_fs_cursor *fc,
const char *name,
const char *contents,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus)
+ struct afs_status_cb *new_scb)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, padsz, c_namesz, c_padsz;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_symlink(fc, name, contents, current_data_version,
- newfid, newstatus);
+ return yfs_fs_symlink(fc, name, contents, dvnode_scb,
+ newfid, new_scb);
_enter("");
@@ -1104,17 +936,16 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = new_scb;
/* marshall the parameters */
bp = call->request;
*bp++ = htonl(FSSYMLINK);
- *bp++ = htonl(vnode->fid.vid);
- *bp++ = htonl(vnode->fid.vnode);
- *bp++ = htonl(vnode->fid.unique);
+ *bp++ = htonl(dvnode->fid.vid);
+ *bp++ = htonl(dvnode->fid.vnode);
+ *bp++ = htonl(dvnode->fid.unique);
*bp++ = htonl(namesz);
memcpy(bp, name, namesz);
bp = (void *) bp + namesz;
@@ -1130,14 +961,15 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
bp = (void *) bp + c_padsz;
}
*bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME);
- *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */
+ *bp++ = htonl(dvnode->vfs_inode.i_mtime.tv_sec); /* mtime */
*bp++ = 0; /* owner */
*bp++ = 0; /* group */
*bp++ = htonl(S_IRWXUGO); /* unix mode */
*bp++ = 0; /* segment size */
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call1(call, &vnode->fid, name);
+ trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1147,29 +979,24 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
*/
static int afs_deliver_fs_rename(struct afs_call *call)
{
- struct afs_vnode *orig_dvnode = call->reply[0], *new_dvnode = call->reply[1];
const __be32 *bp;
int ret;
- _enter("{%u}", call->unmarshall);
-
ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- if (new_dvnode != orig_dvnode) {
- ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
- &call->expected_version_2, NULL);
+ if (call->out_dir_scb != call->out_scb) {
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
}
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -1186,14 +1013,14 @@ static const struct afs_call_type afs_RXFSRename = {
};
/*
- * create a symbolic link
+ * Rename/move a file or directory.
*/
int afs_fs_rename(struct afs_fs_cursor *fc,
const char *orig_name,
struct afs_vnode *new_dvnode,
const char *new_name,
- u64 current_orig_data_version,
- u64 current_new_data_version)
+ struct afs_status_cb *orig_dvnode_scb,
+ struct afs_status_cb *new_dvnode_scb)
{
struct afs_vnode *orig_dvnode = fc->vnode;
struct afs_call *call;
@@ -1204,8 +1031,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
return yfs_fs_rename(fc, orig_name,
new_dvnode, new_name,
- current_orig_data_version,
- current_new_data_version);
+ orig_dvnode_scb,
+ new_dvnode_scb);
_enter("");
@@ -1225,10 +1052,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = orig_dvnode;
- call->reply[1] = new_dvnode;
- call->expected_version = current_orig_data_version + 1;
- call->expected_version_2 = current_new_data_version + 1;
+ call->out_dir_scb = orig_dvnode_scb;
+ call->out_scb = new_dvnode_scb;
/* marshall the parameters */
bp = call->request;
@@ -1257,6 +1082,7 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1266,7 +1092,6 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
*/
static int afs_deliver_fs_store_data(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -1278,13 +1103,10 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
-
- afs_pages_written_back(vnode, call);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -1314,7 +1136,8 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
struct address_space *mapping,
pgoff_t first, pgoff_t last,
unsigned offset, unsigned to,
- loff_t size, loff_t pos, loff_t i_size)
+ loff_t size, loff_t pos, loff_t i_size,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1332,13 +1155,12 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
call->key = fc->key;
call->mapping = mapping;
- call->reply[0] = vnode;
call->first = first;
call->last = last;
call->first_offset = offset;
call->last_to = to;
call->send_pages = true;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1362,6 +1184,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
*bp++ = htonl((u32) i_size);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1371,7 +1194,8 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
*/
int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
pgoff_t first, pgoff_t last,
- unsigned offset, unsigned to)
+ unsigned offset, unsigned to,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1380,7 +1204,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_store_data(fc, mapping, first, last, offset, to);
+ return yfs_fs_store_data(fc, mapping, first, last, offset, to, scb);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
@@ -1401,7 +1225,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32)
return afs_fs_store_data64(fc, mapping, first, last, offset, to,
- size, pos, i_size);
+ size, pos, i_size, scb);
call = afs_alloc_flat_call(net, &afs_RXFSStoreData,
(4 + 6 + 3) * 4,
@@ -1411,13 +1235,12 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
call->key = fc->key;
call->mapping = mapping;
- call->reply[0] = vnode;
call->first = first;
call->last = last;
call->first_offset = offset;
call->last_to = to;
call->send_pages = true;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1439,6 +1262,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1448,7 +1272,6 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
*/
static int afs_deliver_fs_store_status(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -1460,11 +1283,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -1498,7 +1320,8 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = {
* set the attributes on a very large file, using FS.StoreData rather than
* FS.StoreStatus so as to alter the file size also
*/
-static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
+static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1517,8 +1340,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1538,6 +1360,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1546,7 +1369,8 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
* set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
* so as to alter the file size also
*/
-static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
+static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1558,7 +1382,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
ASSERT(attr->ia_valid & ATTR_SIZE);
if (attr->ia_size >> 32)
- return afs_fs_setattr_size64(fc, attr);
+ return afs_fs_setattr_size64(fc, attr, scb);
call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status,
(4 + 6 + 3) * 4,
@@ -1567,8 +1391,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1585,6 +1408,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1593,7 +1417,8 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
* set the attributes on a file, using FS.StoreData if there's a change in file
* size, and FS.StoreStatus otherwise
*/
-int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
+int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1601,10 +1426,10 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_setattr(fc, attr);
+ return yfs_fs_setattr(fc, attr, scb);
if (attr->ia_valid & ATTR_SIZE)
- return afs_fs_setattr_size(fc, attr);
+ return afs_fs_setattr_size(fc, attr, scb);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
@@ -1616,8 +1441,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1630,6 +1454,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1659,7 +1484,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
return ret;
bp = call->buffer;
- xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
+ xdr_decode_AFSFetchVolumeStatus(&bp, call->out_volstatus);
call->unmarshall++;
afs_extract_to_tmp(call);
@@ -1675,7 +1500,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
return afs_protocol_error(call, -EBADMSG,
afs_eproto_volname_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the volume name */
@@ -1685,7 +1510,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("volname '%s'", p);
afs_extract_to_tmp(call);
@@ -1703,7 +1528,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
return afs_protocol_error(call, -EBADMSG,
afs_eproto_offline_msg_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the offline message */
@@ -1713,7 +1538,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("offline '%s'", p);
@@ -1732,7 +1557,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
return afs_protocol_error(call, -EBADMSG,
afs_eproto_motd_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the message of the day */
@@ -1742,7 +1567,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("motd '%s'", p);
@@ -1757,23 +1582,13 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
}
/*
- * destroy an FS.GetVolumeStatus call
- */
-static void afs_get_volume_status_call_destructor(struct afs_call *call)
-{
- kfree(call->reply[2]);
- call->reply[2] = NULL;
- afs_flat_call_destructor(call);
-}
-
-/*
* FS.GetVolumeStatus operation type
*/
static const struct afs_call_type afs_RXFSGetVolumeStatus = {
.name = "FS.GetVolumeStatus",
.op = afs_FS_GetVolumeStatus,
.deliver = afs_deliver_fs_get_volume_status,
- .destructor = afs_get_volume_status_call_destructor,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -1786,27 +1601,19 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
struct afs_call *call;
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
- void *tmpbuf;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
return yfs_fs_get_volume_status(fc, vs);
_enter("");
- tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
- if (!tmpbuf)
- return -ENOMEM;
-
- call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, 12 * 4);
- if (!call) {
- kfree(tmpbuf);
+ call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4,
+ max(12 * 4, AFSOPAQUEMAX + 1));
+ if (!call)
return -ENOMEM;
- }
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = vs;
- call->reply[2] = tmpbuf;
+ call->out_volstatus = vs;
/* marshall the parameters */
bp = call->request;
@@ -1815,6 +1622,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1835,7 +1643,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- /* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -1876,7 +1684,8 @@ static const struct afs_call_type afs_RXFSReleaseLock = {
/*
* Set a lock on a file
*/
-int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
+int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1884,7 +1693,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_set_lock(fc, type);
+ return yfs_fs_set_lock(fc, type, scb);
_enter("");
@@ -1893,8 +1702,8 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->want_reply_time = true;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1906,6 +1715,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_calli(call, &vnode->fid, type);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1913,7 +1723,7 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
/*
* extend a lock on a file
*/
-int afs_fs_extend_lock(struct afs_fs_cursor *fc)
+int afs_fs_extend_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1921,7 +1731,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_extend_lock(fc);
+ return yfs_fs_extend_lock(fc, scb);
_enter("");
@@ -1930,8 +1740,8 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->want_reply_time = true;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1942,6 +1752,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1949,7 +1760,7 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
/*
* release a lock on a file
*/
-int afs_fs_release_lock(struct afs_fs_cursor *fc)
+int afs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1957,7 +1768,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_release_lock(fc);
+ return yfs_fs_release_lock(fc, scb);
_enter("");
@@ -1966,7 +1777,8 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1977,6 +1789,7 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -2071,14 +1884,6 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
return 0;
}
-static void afs_destroy_fs_get_capabilities(struct afs_call *call)
-{
- struct afs_server *server = call->reply[0];
-
- afs_put_server(call->net, server);
- afs_flat_call_destructor(call);
-}
-
/*
* FS.GetCapabilities operation type
*/
@@ -2087,7 +1892,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
.op = afs_FS_GetCapabilities,
.deliver = afs_deliver_fs_get_capabilities,
.done = afs_fileserver_probe_result,
- .destructor = afs_destroy_fs_get_capabilities,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -2110,11 +1915,11 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net,
return ERR_PTR(-ENOMEM);
call->key = key;
- call->reply[0] = afs_get_server(server);
- call->reply[1] = (void *)(long)server_index;
+ call->server = afs_get_server(server);
+ call->server_index = server_index;
call->upgrade = true;
- call->want_reply_time = true;
call->async = true;
+ call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
/* marshall the parameters */
bp = call->request;
@@ -2131,10 +1936,6 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net,
*/
static int afs_deliver_fs_fetch_status(struct afs_call *call)
{
- struct afs_file_status *status = call->reply[1];
- struct afs_callback *callback = call->reply[2];
- struct afs_volsync *volsync = call->reply[3];
- struct afs_fid *fid = call->reply[0];
const __be32 *bp;
int ret;
@@ -2142,16 +1943,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
if (ret < 0)
return ret;
- _enter("{%llx:%llu}", fid->vid, fid->vnode);
-
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = afs_decode_status(call, &bp, status, NULL,
- &call->expected_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSCallBack_raw(call, &bp, callback);
- xdr_decode_AFSVolSync(&bp, volsync);
+ xdr_decode_AFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -2173,15 +1971,14 @@ static const struct afs_call_type afs_RXFSFetchStatus = {
int afs_fs_fetch_status(struct afs_fs_cursor *fc,
struct afs_net *net,
struct afs_fid *fid,
- struct afs_file_status *status,
- struct afs_callback *callback,
+ struct afs_status_cb *scb,
struct afs_volsync *volsync)
{
struct afs_call *call;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync);
+ return yfs_fs_fetch_status(fc, net, fid, scb, volsync);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), fid->vid, fid->vnode);
@@ -2193,12 +1990,9 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
}
call->key = fc->key;
- call->reply[0] = fid;
- call->reply[1] = status;
- call->reply[2] = callback;
- call->reply[3] = volsync;
- call->expected_version = 1; /* vnode->status.data_version */
- call->want_reply_time = true;
+ call->out_fid = fid;
+ call->out_scb = scb;
+ call->out_volsync = volsync;
/* marshall the parameters */
bp = call->request;
@@ -2207,9 +2001,9 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
bp[2] = htonl(fid->vnode);
bp[3] = htonl(fid->unique);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -2219,9 +2013,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
*/
static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
{
- struct afs_file_status *statuses;
- struct afs_callback *callbacks;
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_status_cb *scb;
const __be32 *bp;
u32 tmp;
int ret;
@@ -2260,10 +2052,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
return ret;
bp = call->buffer;
- statuses = call->reply[1];
- ret = afs_decode_status(call, &bp, &statuses[call->count],
- call->count == 0 ? vnode : NULL,
- NULL, NULL);
+ scb = &call->out_scb[call->count];
+ ret = xdr_decode_AFSFetchStatus(&bp, call, scb);
if (ret < 0)
return ret;
@@ -2302,13 +2092,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
_debug("unmarshall CB array");
bp = call->buffer;
- callbacks = call->reply[2];
- callbacks[call->count].version = ntohl(bp[0]);
- callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1]));
- callbacks[call->count].type = ntohl(bp[2]);
- statuses = call->reply[1];
- if (call->count == 0 && vnode && statuses[0].abort_code == 0)
- xdr_decode_AFSCallBack(call, vnode, &bp);
+ scb = &call->out_scb[call->count];
+ xdr_decode_AFSCallBack(&bp, call, scb);
call->count++;
if (call->count < call->count2)
goto more_cbs;
@@ -2323,7 +2108,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
return ret;
bp = call->buffer;
- xdr_decode_AFSVolSync(&bp, call->reply[3]);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
@@ -2351,8 +2136,7 @@ static const struct afs_call_type afs_RXFSInlineBulkStatus = {
int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
struct afs_net *net,
struct afs_fid *fids,
- struct afs_file_status *statuses,
- struct afs_callback *callbacks,
+ struct afs_status_cb *statuses,
unsigned int nr_fids,
struct afs_volsync *volsync)
{
@@ -2361,7 +2145,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
int i;
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
- return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks,
+ return yfs_fs_inline_bulk_status(fc, net, fids, statuses,
nr_fids, volsync);
_enter(",%x,{%llx:%llu},%u",
@@ -2376,12 +2160,9 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
}
call->key = fc->key;
- call->reply[0] = NULL; /* vnode for fid[0] */
- call->reply[1] = statuses;
- call->reply[2] = callbacks;
- call->reply[3] = volsync;
+ call->out_scb = statuses;
+ call->out_volsync = volsync;
call->count2 = nr_fids;
- call->want_reply_time = true;
/* marshall the parameters */
bp = call->request;
@@ -2393,9 +2174,9 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
*bp++ = htonl(fids[i].unique);
}
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &fids[0]);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -2405,7 +2186,6 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
*/
static int afs_deliver_fs_fetch_acl(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[1];
struct afs_acl *acl;
const __be32 *bp;
unsigned int size;
@@ -2430,7 +2210,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL);
if (!acl)
return -ENOMEM;
- call->reply[0] = acl;
+ call->ret_acl = acl;
acl->size = call->count2;
afs_extract_begin(call, acl->data, size);
call->unmarshall++;
@@ -2451,11 +2231,10 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = afs_decode_status(call, &bp, &vnode->status, vnode,
- &vnode->status.data_version, NULL);
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_AFSVolSync(&bp, call->reply[2]);
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
@@ -2469,7 +2248,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
static void afs_destroy_fs_fetch_acl(struct afs_call *call)
{
- kfree(call->reply[0]);
+ kfree(call->ret_acl);
afs_flat_call_destructor(call);
}
@@ -2486,7 +2265,8 @@ static const struct afs_call_type afs_RXFSFetchACL = {
/*
* Fetch the ACL for a file.
*/
-struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
+struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -2503,10 +2283,9 @@ struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
}
call->key = fc->key;
- call->reply[0] = NULL;
- call->reply[1] = vnode;
- call->reply[2] = NULL; /* volsync */
- call->ret_reply0 = true;
+ call->ret_acl = NULL;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
/* marshall the parameters */
bp = call->request;
@@ -2515,7 +2294,6 @@ struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
bp[2] = htonl(vnode->fid.vnode);
bp[3] = htonl(vnode->fid.unique);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
afs_make_call(&fc->ac, call, GFP_KERNEL);
@@ -2523,19 +2301,43 @@ struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc)
}
/*
+ * Deliver reply data to any operation that returns file status and volume
+ * sync.
+ */
+static int afs_deliver_fs_file_status_and_vol(struct afs_call *call)
+{
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
+ if (ret < 0)
+ return ret;
+ xdr_decode_AFSVolSync(&bp, call->out_volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
* FS.StoreACL operation type
*/
static const struct afs_call_type afs_RXFSStoreACL = {
.name = "FS.StoreACL",
.op = afs_FS_StoreACL,
- .deliver = afs_deliver_fs_status_and_vol,
+ .deliver = afs_deliver_fs_file_status_and_vol,
.destructor = afs_flat_call_destructor,
};
/*
* Fetch the ACL for a file.
*/
-int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl)
+int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -2555,8 +2357,8 @@ int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl)
}
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[2] = NULL; /* volsync */
+ call->out_scb = scb;
+ call->out_volsync = NULL;
/* marshall the parameters */
bp = call->request;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index c4652b42d545..b42d9d09669c 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -23,6 +23,7 @@
#include <linux/namei.h>
#include <linux/iversion.h>
#include "internal.h"
+#include "afs_fs.h"
static const struct inode_operations afs_symlink_inode_operations = {
.get_link = page_get_link,
@@ -58,38 +59,50 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren
* Initialise an inode from the vnode status.
*/
static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
- struct afs_vnode *parent_vnode)
+ struct afs_cb_interest *cbi,
+ struct afs_vnode *parent_vnode,
+ struct afs_status_cb *scb)
{
+ struct afs_cb_interest *old_cbi = NULL;
+ struct afs_file_status *status = &scb->status;
struct inode *inode = AFS_VNODE_TO_I(vnode);
+ struct timespec64 t;
_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
- vnode->status.type,
- vnode->status.nlink,
- (unsigned long long) vnode->status.size,
- vnode->status.data_version,
- vnode->status.mode);
+ status->type,
+ status->nlink,
+ (unsigned long long) status->size,
+ status->data_version,
+ status->mode);
- read_seqlock_excl(&vnode->cb_lock);
+ write_seqlock(&vnode->cb_lock);
- afs_update_inode_from_status(vnode, &vnode->status, NULL,
- AFS_VNODE_NOT_YET_SET);
+ vnode->status = *status;
- switch (vnode->status.type) {
+ t = status->mtime_client;
+ inode->i_ctime = t;
+ inode->i_mtime = t;
+ inode->i_atime = t;
+ inode->i_uid = make_kuid(&init_user_ns, status->owner);
+ inode->i_gid = make_kgid(&init_user_ns, status->group);
+ set_nlink(&vnode->vfs_inode, status->nlink);
+
+ switch (status->type) {
case AFS_FTYPE_FILE:
- inode->i_mode = S_IFREG | vnode->status.mode;
+ inode->i_mode = S_IFREG | status->mode;
inode->i_op = &afs_file_inode_operations;
inode->i_fop = &afs_file_operations;
inode->i_mapping->a_ops = &afs_fs_aops;
break;
case AFS_FTYPE_DIR:
- inode->i_mode = S_IFDIR | vnode->status.mode;
+ inode->i_mode = S_IFDIR | status->mode;
inode->i_op = &afs_dir_inode_operations;
inode->i_fop = &afs_dir_file_operations;
inode->i_mapping->a_ops = &afs_dir_aops;
break;
case AFS_FTYPE_SYMLINK:
/* Symlinks with a mode of 0644 are actually mountpoints. */
- if ((vnode->status.mode & 0777) == 0644) {
+ if ((status->mode & 0777) == 0644) {
inode->i_flags |= S_AUTOMOUNT;
set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
@@ -99,7 +112,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
inode->i_fop = &afs_mntpt_file_operations;
inode->i_mapping->a_ops = &afs_fs_aops;
} else {
- inode->i_mode = S_IFLNK | vnode->status.mode;
+ inode->i_mode = S_IFLNK | status->mode;
inode->i_op = &afs_symlink_inode_operations;
inode->i_mapping->a_ops = &afs_fs_aops;
}
@@ -107,7 +120,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
break;
default:
dump_vnode(vnode, parent_vnode);
- read_sequnlock_excl(&vnode->cb_lock);
+ write_sequnlock(&vnode->cb_lock);
return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
}
@@ -116,17 +129,175 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
* for consistency with other AFS clients.
*/
inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1;
- vnode->invalid_before = vnode->status.data_version;
+ i_size_write(&vnode->vfs_inode, status->size);
+
+ vnode->invalid_before = status->data_version;
+ inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
+
+ if (!scb->have_cb) {
+ /* it's a symlink we just created (the fileserver
+ * didn't give us a callback) */
+ vnode->cb_expires_at = ktime_get_real_seconds();
+ } else {
+ vnode->cb_expires_at = scb->callback.expires_at;
+ old_cbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ if (cbi != old_cbi)
+ rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(cbi));
+ else
+ old_cbi = NULL;
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
- read_sequnlock_excl(&vnode->cb_lock);
+ write_sequnlock(&vnode->cb_lock);
+ afs_put_cb_interest(afs_v2net(vnode), old_cbi);
return 0;
}
/*
+ * Update the core inode struct from a returned status record.
+ */
+static void afs_apply_status(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode,
+ struct afs_status_cb *scb,
+ const afs_dataversion_t *expected_version)
+{
+ struct afs_file_status *status = &scb->status;
+ struct timespec64 t;
+ umode_t mode;
+ bool data_changed = false;
+
+ BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags));
+
+ if (status->type != vnode->status.type) {
+ pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ status->type, vnode->status.type);
+ afs_protocol_error(NULL, -EBADMSG, afs_eproto_bad_status);
+ return;
+ }
+
+ if (status->nlink != vnode->status.nlink)
+ set_nlink(&vnode->vfs_inode, status->nlink);
+
+ if (status->owner != vnode->status.owner)
+ vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner);
+
+ if (status->group != vnode->status.group)
+ vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group);
+
+ if (status->mode != vnode->status.mode) {
+ mode = vnode->vfs_inode.i_mode;
+ mode &= ~S_IALLUGO;
+ mode |= status->mode;
+ WRITE_ONCE(vnode->vfs_inode.i_mode, mode);
+ }
+
+ t = status->mtime_client;
+ vnode->vfs_inode.i_ctime = t;
+ vnode->vfs_inode.i_mtime = t;
+ vnode->vfs_inode.i_atime = t;
+
+ if (vnode->status.data_version != status->data_version)
+ data_changed = true;
+
+ vnode->status = *status;
+
+ if (expected_version &&
+ *expected_version != status->data_version) {
+ kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s",
+ (unsigned long long) status->data_version,
+ vnode->fid.vid, vnode->fid.vnode,
+ (unsigned long long) *expected_version,
+ fc->type ? fc->type->name : "???");
+ vnode->invalid_before = status->data_version;
+ if (vnode->status.type == AFS_FTYPE_DIR) {
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ afs_stat_v(vnode, n_inval);
+ } else {
+ set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+ }
+ } else if (vnode->status.type == AFS_FTYPE_DIR) {
+ /* Expected directory change is handled elsewhere so
+ * that we can locally edit the directory and save on a
+ * download.
+ */
+ if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ data_changed = false;
+ }
+
+ if (data_changed) {
+ inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
+ i_size_write(&vnode->vfs_inode, status->size);
+ }
+}
+
+/*
+ * Apply a callback to a vnode.
+ */
+static void afs_apply_callback(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode,
+ struct afs_status_cb *scb,
+ unsigned int cb_break)
+{
+ struct afs_cb_interest *old;
+ struct afs_callback *cb = &scb->callback;
+
+ if (!afs_cb_is_broken(cb_break, vnode, fc->cbi)) {
+ vnode->cb_expires_at = cb->expires_at;
+ old = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ if (old != fc->cbi) {
+ rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(fc->cbi));
+ afs_put_cb_interest(afs_v2net(vnode), old);
+ }
+ set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+ }
+}
+
+/*
+ * Apply the received status and callback to an inode all in the same critical
+ * section to avoid races with afs_validate().
+ */
+void afs_vnode_commit_status(struct afs_fs_cursor *fc,
+ struct afs_vnode *vnode,
+ unsigned int cb_break,
+ const afs_dataversion_t *expected_version,
+ struct afs_status_cb *scb)
+{
+ if (fc->ac.error != 0)
+ return;
+
+ write_seqlock(&vnode->cb_lock);
+
+ if (scb->have_error) {
+ if (scb->status.abort_code == VNOVNODE) {
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ clear_nlink(&vnode->vfs_inode);
+ __afs_break_callback(vnode);
+ }
+ } else {
+ if (scb->have_status)
+ afs_apply_status(fc, vnode, scb, expected_version);
+ if (scb->have_cb)
+ afs_apply_callback(fc, vnode, scb, cb_break);
+ }
+
+ write_sequnlock(&vnode->cb_lock);
+
+ if (fc->ac.error == 0 && scb->have_status)
+ afs_cache_permit(vnode, fc->key, cb_break, scb);
+}
+
+/*
* Fetch file status from the volume.
*/
-int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new,
+ afs_access_t *_caller_access)
{
+ struct afs_status_cb *scb;
struct afs_fs_cursor fc;
int ret;
@@ -135,18 +306,38 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
vnode->flags);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ return -ENOMEM;
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_fetch_file_status(&fc, NULL, new_inode);
+ afs_fs_fetch_file_status(&fc, scb, NULL);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ if (fc.error) {
+ /* Do nothing. */
+ } else if (is_new) {
+ ret = afs_inode_init_from_status(vnode, key, fc.cbi,
+ NULL, scb);
+ fc.error = ret;
+ if (ret == 0)
+ afs_cache_permit(vnode, key, fc.cb_break, scb);
+ } else {
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
+ }
+ afs_check_for_remote_deletion(&fc, vnode);
ret = afs_end_vnode_operation(&fc);
}
+ if (ret == 0 && _caller_access)
+ *_caller_access = scb->status.caller_access;
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
@@ -156,10 +347,10 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
*/
int afs_iget5_test(struct inode *inode, void *opaque)
{
- struct afs_iget_data *data = opaque;
+ struct afs_iget_data *iget_data = opaque;
struct afs_vnode *vnode = AFS_FS_I(inode);
- return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0;
+ return memcmp(&vnode->fid, &iget_data->fid, sizeof(iget_data->fid)) == 0;
}
/*
@@ -177,17 +368,19 @@ static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque)
*/
static int afs_iget5_set(struct inode *inode, void *opaque)
{
- struct afs_iget_data *data = opaque;
+ struct afs_iget_data *iget_data = opaque;
struct afs_vnode *vnode = AFS_FS_I(inode);
- vnode->fid = data->fid;
- vnode->volume = data->volume;
+ vnode->fid = iget_data->fid;
+ vnode->volume = iget_data->volume;
+ vnode->cb_v_break = iget_data->cb_v_break;
+ vnode->cb_s_break = iget_data->cb_s_break;
/* YFS supports 96-bit vnode IDs, but Linux only supports
* 64-bit inode numbers.
*/
- inode->i_ino = data->fid.vnode;
- inode->i_generation = data->fid.unique;
+ inode->i_ino = iget_data->fid.vnode;
+ inode->i_generation = iget_data->fid.unique;
return 0;
}
@@ -197,38 +390,42 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
*/
struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
{
- struct afs_iget_data data;
struct afs_super_info *as;
struct afs_vnode *vnode;
struct inode *inode;
static atomic_t afs_autocell_ino;
+ struct afs_iget_data iget_data = {
+ .cb_v_break = 0,
+ .cb_s_break = 0,
+ };
+
_enter("");
as = sb->s_fs_info;
if (as->volume) {
- data.volume = as->volume;
- data.fid.vid = as->volume->vid;
+ iget_data.volume = as->volume;
+ iget_data.fid.vid = as->volume->vid;
}
if (root) {
- data.fid.vnode = 1;
- data.fid.unique = 1;
+ iget_data.fid.vnode = 1;
+ iget_data.fid.unique = 1;
} else {
- data.fid.vnode = atomic_inc_return(&afs_autocell_ino);
- data.fid.unique = 0;
+ iget_data.fid.vnode = atomic_inc_return(&afs_autocell_ino);
+ iget_data.fid.unique = 0;
}
- inode = iget5_locked(sb, data.fid.vnode,
+ inode = iget5_locked(sb, iget_data.fid.vnode,
afs_iget5_pseudo_dir_test, afs_iget5_set,
- &data);
+ &iget_data);
if (!inode) {
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
}
_debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
- inode, inode->i_ino, data.fid.vid, data.fid.vnode,
- data.fid.unique);
+ inode, inode->i_ino, iget_data.fid.vid, iget_data.fid.vnode,
+ iget_data.fid.unique);
vnode = AFS_FS_I(inode);
@@ -299,23 +496,24 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
* inode retrieval
*/
struct inode *afs_iget(struct super_block *sb, struct key *key,
- struct afs_fid *fid, struct afs_file_status *status,
- struct afs_callback *cb, struct afs_cb_interest *cbi,
+ struct afs_iget_data *iget_data,
+ struct afs_status_cb *scb,
+ struct afs_cb_interest *cbi,
struct afs_vnode *parent_vnode)
{
- struct afs_iget_data data = { .fid = *fid };
struct afs_super_info *as;
struct afs_vnode *vnode;
+ struct afs_fid *fid = &iget_data->fid;
struct inode *inode;
int ret;
_enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique);
as = sb->s_fs_info;
- data.volume = as->volume;
+ iget_data->volume = as->volume;
inode = iget5_locked(sb, fid->vnode, afs_iget5_test, afs_iget5_set,
- &data);
+ iget_data);
if (!inode) {
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
@@ -332,43 +530,25 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
return inode;
}
- if (!status) {
+ if (!scb) {
/* it's a remotely extant inode */
- ret = afs_fetch_status(vnode, key, true);
+ ret = afs_fetch_status(vnode, key, true, NULL);
if (ret < 0)
goto bad_inode;
} else {
- /* it's an inode we just created */
- memcpy(&vnode->status, status, sizeof(vnode->status));
-
- if (!cb) {
- /* it's a symlink we just created (the fileserver
- * didn't give us a callback) */
- vnode->cb_version = 0;
- vnode->cb_type = 0;
- vnode->cb_expires_at = ktime_get();
- } else {
- vnode->cb_version = cb->version;
- vnode->cb_type = cb->type;
- vnode->cb_expires_at = cb->expires_at;
- vnode->cb_interest = afs_get_cb_interest(cbi);
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- }
-
- vnode->cb_expires_at += ktime_get_real_seconds();
+ ret = afs_inode_init_from_status(vnode, key, cbi, parent_vnode,
+ scb);
+ if (ret < 0)
+ goto bad_inode;
}
- ret = afs_inode_init_from_status(vnode, key, parent_vnode);
- if (ret < 0)
- goto bad_inode;
-
afs_get_inode_cache(vnode);
/* success */
clear_bit(AFS_VNODE_UNSET, &vnode->flags);
inode->i_flags |= S_NOATIME;
unlock_new_inode(inode);
- _leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type);
+ _leave(" = %p", inode);
return inode;
/* failure */
@@ -400,6 +580,66 @@ void afs_zap_data(struct afs_vnode *vnode)
}
/*
+ * Check the validity of a vnode/inode.
+ */
+bool afs_check_validity(struct afs_vnode *vnode)
+{
+ struct afs_cb_interest *cbi;
+ struct afs_server *server;
+ struct afs_volume *volume = vnode->volume;
+ time64_t now = ktime_get_real_seconds();
+ bool valid, need_clear = false;
+ unsigned int cb_break, cb_s_break, cb_v_break;
+ int seq = 0;
+
+ do {
+ read_seqbegin_or_lock(&vnode->cb_lock, &seq);
+ cb_v_break = READ_ONCE(volume->cb_v_break);
+ cb_break = vnode->cb_break;
+
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
+ cbi = rcu_dereference(vnode->cb_interest);
+ server = rcu_dereference(cbi->server);
+ cb_s_break = READ_ONCE(server->cb_s_break);
+
+ if (vnode->cb_s_break != cb_s_break ||
+ vnode->cb_v_break != cb_v_break) {
+ vnode->cb_s_break = cb_s_break;
+ vnode->cb_v_break = cb_v_break;
+ need_clear = true;
+ valid = false;
+ } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+ need_clear = true;
+ valid = false;
+ } else if (vnode->cb_expires_at - 10 <= now) {
+ need_clear = true;
+ valid = false;
+ } else {
+ valid = true;
+ }
+ } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+ valid = true;
+ } else {
+ vnode->cb_v_break = cb_v_break;
+ valid = false;
+ }
+
+ } while (need_seqretry(&vnode->cb_lock, seq));
+
+ done_seqretry(&vnode->cb_lock, seq);
+
+ if (need_clear) {
+ write_seqlock(&vnode->cb_lock);
+ if (cb_break == vnode->cb_break)
+ __afs_break_callback(vnode);
+ write_sequnlock(&vnode->cb_lock);
+ valid = false;
+ }
+
+ return valid;
+}
+
+/*
* validate a vnode/inode
* - there are several things we need to check
* - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
@@ -410,7 +650,6 @@ void afs_zap_data(struct afs_vnode *vnode)
*/
int afs_validate(struct afs_vnode *vnode, struct key *key)
{
- time64_t now = ktime_get_real_seconds();
bool valid;
int ret;
@@ -418,36 +657,9 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
vnode->fid.vid, vnode->fid.vnode, vnode->flags,
key_serial(key));
- /* Quickly check the callback state. Ideally, we'd use read_seqbegin
- * here, but we have no way to pass the net namespace to the RCU
- * cleanup for the server record.
- */
- read_seqlock_excl(&vnode->cb_lock);
-
- if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
- if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break ||
- vnode->cb_v_break != vnode->volume->cb_v_break) {
- vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
- vnode->cb_v_break = vnode->volume->cb_v_break;
- valid = false;
- } else if (vnode->status.type == AFS_FTYPE_DIR &&
- (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) ||
- vnode->cb_expires_at - 10 <= now)) {
- valid = false;
- } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) ||
- vnode->cb_expires_at - 10 <= now) {
- valid = false;
- } else {
- valid = true;
- }
- } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- valid = true;
- } else {
- vnode->cb_v_break = vnode->volume->cb_v_break;
- valid = false;
- }
-
- read_sequnlock_excl(&vnode->cb_lock);
+ rcu_read_lock();
+ valid = afs_check_validity(vnode);
+ rcu_read_unlock();
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
clear_nlink(&vnode->vfs_inode);
@@ -463,7 +675,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
* access */
if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
_debug("not promised");
- ret = afs_fetch_status(vnode, key, false);
+ ret = afs_fetch_status(vnode, key, false, NULL);
if (ret < 0) {
if (ret == -ENOENT) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
@@ -534,6 +746,7 @@ int afs_drop_inode(struct inode *inode)
*/
void afs_evict_inode(struct inode *inode)
{
+ struct afs_cb_interest *cbi;
struct afs_vnode *vnode;
vnode = AFS_FS_I(inode);
@@ -550,10 +763,14 @@ void afs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
- if (vnode->cb_interest) {
- afs_put_cb_interest(afs_i2net(inode), vnode->cb_interest);
- vnode->cb_interest = NULL;
+ write_seqlock(&vnode->cb_lock);
+ cbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->cb_lock.lock));
+ if (cbi) {
+ afs_put_cb_interest(afs_i2net(inode), cbi);
+ rcu_assign_pointer(vnode->cb_interest, NULL);
}
+ write_sequnlock(&vnode->cb_lock);
while (!list_empty(&vnode->wb_keys)) {
struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next,
@@ -573,6 +790,7 @@ void afs_evict_inode(struct inode *inode)
}
#endif
+ afs_prune_wb_keys(vnode);
afs_put_permits(rcu_access_pointer(vnode->permit_cache));
key_put(vnode->silly_key);
vnode->silly_key = NULL;
@@ -587,9 +805,10 @@ void afs_evict_inode(struct inode *inode)
int afs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
struct key *key;
- int ret;
+ int ret = -ENOMEM;
_enter("{%llx:%llu},{n=%pd},%x",
vnode->fid.vid, vnode->fid.vnode, dentry,
@@ -601,6 +820,10 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
return 0;
}
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL);
+ if (!scb)
+ goto error;
+
/* flush any dirty data outstanding on a regular file */
if (S_ISREG(vnode->vfs_inode.i_mode))
filemap_write_and_wait(vnode->vfs_inode.i_mapping);
@@ -611,25 +834,33 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key)) {
ret = PTR_ERR(key);
- goto error;
+ goto error_scb;
}
}
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, false)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ data_version++;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_setattr(&fc, attr);
+ afs_fs_setattr(&fc, attr, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
if (!(attr->ia_valid & ATTR_FILE))
key_put(key);
+error_scb:
+ kfree(scb);
error:
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b3cd6e8ad59d..2073c1a3ab4b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -66,6 +66,8 @@ struct afs_fs_context {
struct afs_iget_data {
struct afs_fid fid;
struct afs_volume *volume; /* volume on which resides */
+ unsigned int cb_v_break; /* Pre-fetch volume break count */
+ unsigned int cb_s_break; /* Pre-fetch server break count */
};
enum afs_call_state {
@@ -111,8 +113,12 @@ struct afs_call {
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct key *key; /* security for this call */
struct afs_net *net; /* The network namespace */
- struct afs_server *cm_server; /* Server affected by incoming CM call */
+ union {
+ struct afs_server *server;
+ struct afs_vlserver *vlserver;
+ };
struct afs_cb_interest *cbi; /* Callback interest for server used */
+ struct afs_vnode *lvnode; /* vnode being locked */
void *request; /* request data (first part) */
struct address_space *mapping; /* Pages being written from */
struct iov_iter iter; /* Buffer iterator */
@@ -122,7 +128,20 @@ struct afs_call {
struct bio_vec bvec[1];
};
void *buffer; /* reply receive buffer */
- void *reply[4]; /* Where to put the reply */
+ union {
+ long ret0; /* Value to reply with instead of 0 */
+ struct afs_addr_list *ret_alist;
+ struct afs_vldb_entry *ret_vldb;
+ struct afs_acl *ret_acl;
+ };
+ struct afs_fid *out_fid;
+ struct afs_status_cb *out_dir_scb;
+ struct afs_status_cb *out_scb;
+ struct yfs_acl *out_yacl;
+ struct afs_volsync *out_volsync;
+ struct afs_volume_status *out_volstatus;
+ struct afs_read *read_request;
+ unsigned int server_index;
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
atomic_t usage;
@@ -131,10 +150,10 @@ struct afs_call {
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
u32 epoch;
+ unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned first_offset; /* offset into mapping[first] */
- unsigned int cb_break; /* cb_break + cb_s_break before the call */
union {
unsigned last_to; /* amount of mapping[last] */
unsigned count2; /* count used in unmarshalling */
@@ -145,9 +164,9 @@ struct afs_call {
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
- bool ret_reply0; /* T if should return reply[0] on success */
bool upgrade; /* T to request service upgrade */
- bool want_reply_time; /* T if want reply_time */
+ bool have_reply_time; /* T if have got reply_time */
+ bool intr; /* T if interruptible */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
u32 operation_ID; /* operation ID for an incoming call */
@@ -159,8 +178,6 @@ struct afs_call {
} __attribute__((packed));
__be64 tmp64;
};
- afs_dataversion_t expected_version; /* Updated version expected from store */
- afs_dataversion_t expected_version_2; /* 2nd updated version expected from store */
ktime_t reply_time; /* Time of first reply packet */
};
@@ -221,7 +238,8 @@ struct afs_read {
unsigned int index; /* Which page we're reading into */
unsigned int nr_pages;
unsigned int offset; /* offset into current page */
- void (*page_done)(struct afs_call *, struct afs_read *);
+ struct afs_vnode *vnode;
+ void (*page_done)(struct afs_read *);
struct page **pages;
struct page *array[];
};
@@ -367,13 +385,13 @@ struct afs_cell {
time64_t last_inactive; /* Time of last drop of usage count */
atomic_t usage;
unsigned long flags;
-#define AFS_CELL_FL_NOT_READY 0 /* The cell record is not ready for use */
-#define AFS_CELL_FL_NO_GC 1 /* The cell was added manually, don't auto-gc */
-#define AFS_CELL_FL_NOT_FOUND 2 /* Permanent DNS error */
-#define AFS_CELL_FL_DNS_FAIL 3 /* Failed to access DNS */
-#define AFS_CELL_FL_NO_LOOKUP_YET 4 /* Not completed first DNS lookup yet */
+#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */
+#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */
enum afs_cell_state state;
short error;
+ enum dns_record_source dns_source:8; /* Latest source of data from lookup */
+ enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */
+ unsigned int dns_lookup_count; /* Counter of DNS lookups */
/* Active fileserver interaction state. */
struct list_head proc_volumes; /* procfs volume list */
@@ -538,7 +556,10 @@ struct afs_server {
struct afs_vol_interest {
struct hlist_node srv_link; /* Link in server->cb_volumes */
struct hlist_head cb_interests; /* List of callback interests on the server */
- afs_volid_t vid; /* Volume ID to match */
+ union {
+ struct rcu_head rcu;
+ afs_volid_t vid; /* Volume ID to match */
+ };
unsigned int usage;
};
@@ -550,7 +571,10 @@ struct afs_cb_interest {
struct afs_vol_interest *vol_interest;
struct afs_server *server; /* Server on which this interest resides */
struct super_block *sb; /* Superblock on which inodes reside */
- afs_volid_t vid; /* Volume ID to match */
+ union {
+ struct rcu_head rcu;
+ afs_volid_t vid; /* Volume ID to match */
+ };
refcount_t usage;
};
@@ -660,15 +684,13 @@ struct afs_vnode {
afs_lock_type_t lock_type : 8;
/* outstanding callback notification on this file */
- struct afs_cb_interest *cb_interest; /* Server on which this resides */
+ struct afs_cb_interest __rcu *cb_interest; /* Server on which this resides */
unsigned int cb_s_break; /* Mass break counter on ->server */
unsigned int cb_v_break; /* Mass break counter on ->volume */
unsigned int cb_break; /* Break counter on vnode */
seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */
time64_t cb_expires_at; /* time at which callback expires */
- unsigned cb_version; /* callback version */
- afs_callback_type_t cb_type; /* type of callback */
};
static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
@@ -755,6 +777,7 @@ struct afs_vl_cursor {
* Cursor for iterating over a set of fileservers.
*/
struct afs_fs_cursor {
+ const struct afs_call_type *type; /* Type of call done */
struct afs_addr_cursor ac;
struct afs_vnode *vnode;
struct afs_server_list *server_list; /* Current server list (pins ref) */
@@ -772,6 +795,7 @@ struct afs_fs_cursor {
#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */
#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */
#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+#define AFS_FS_CURSOR_INTR 0x0040 /* Set if op is interruptible */
unsigned short nr_iterations; /* Number of server iterations */
};
@@ -882,7 +906,6 @@ extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;
extern void afs_d_release(struct dentry *);
-extern int afs_dir_remove_link(struct dentry *, struct key *, unsigned long, unsigned long);
/*
* dir_edit.c
@@ -940,50 +963,48 @@ extern int afs_flock(struct file *, int, struct file_lock *);
/*
* fsclient.c
*/
-#define AFS_VNODE_NOT_YET_SET 0x01
-#define AFS_VNODE_META_CHANGED 0x02
-#define AFS_VNODE_DATA_CHANGED 0x04
-extern void afs_update_inode_from_status(struct afs_vnode *, struct afs_file_status *,
- const afs_dataversion_t *, u8);
-
-extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
+extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_status_cb *,
+ struct afs_volsync *);
extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
-extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
-extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64,
- struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
-extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
-extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
- struct afs_fid *, struct afs_file_status *);
+extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_status_cb *, struct afs_read *);
+extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t,
+ struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *);
+extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool,
+ struct afs_status_cb *);
+extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
+extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
+ struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *);
extern int afs_fs_rename(struct afs_fs_cursor *, const char *,
- struct afs_vnode *, const char *, u64, u64);
+ struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
- pgoff_t, pgoff_t, unsigned, unsigned);
-extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+ pgoff_t, pgoff_t, unsigned, unsigned, struct afs_status_cb *);
+extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *, struct afs_status_cb *);
extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
-extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
-extern int afs_fs_extend_lock(struct afs_fs_cursor *);
-extern int afs_fs_release_lock(struct afs_fs_cursor *);
+extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t, struct afs_status_cb *);
+extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct afs_status_cb *);
+extern int afs_fs_release_lock(struct afs_fs_cursor *, struct afs_status_cb *);
extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
struct afs_addr_cursor *, struct key *);
extern struct afs_call *afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
struct afs_addr_cursor *, struct key *,
unsigned int);
extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, unsigned int,
- struct afs_volsync *);
+ struct afs_fid *, struct afs_status_cb *,
+ unsigned int, struct afs_volsync *);
extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, struct afs_volsync *);
+ struct afs_fid *, struct afs_status_cb *,
+ struct afs_volsync *);
struct afs_acl {
u32 size;
u8 data[];
};
-extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *);
-extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *);
+extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *, struct afs_status_cb *);
+extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *,
+ struct afs_status_cb *);
/*
* fs_probe.c
@@ -995,15 +1016,20 @@ extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
/*
* inode.c
*/
-extern int afs_fetch_status(struct afs_vnode *, struct key *, bool);
+extern void afs_vnode_commit_status(struct afs_fs_cursor *,
+ struct afs_vnode *,
+ unsigned int,
+ const afs_dataversion_t *,
+ struct afs_status_cb *);
+extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_iget5_test(struct inode *, void *);
extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
extern struct inode *afs_iget(struct super_block *, struct key *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *,
+ struct afs_iget_data *, struct afs_status_cb *,
struct afs_cb_interest *,
struct afs_vnode *);
extern void afs_zap_data(struct afs_vnode *);
+extern bool afs_check_validity(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
extern int afs_setattr(struct dentry *, struct iattr *);
@@ -1096,7 +1122,7 @@ static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {}
* rotate.c
*/
extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *,
- struct key *);
+ struct key *, bool);
extern bool afs_select_fileserver(struct afs_fs_cursor *);
extern bool afs_select_current_fileserver(struct afs_fs_cursor *);
extern int afs_end_vnode_operation(struct afs_fs_cursor *);
@@ -1121,6 +1147,12 @@ extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
extern int afs_extract_data(struct afs_call *, bool);
extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
+static inline void afs_set_fc_call(struct afs_call *call, struct afs_fs_cursor *fc)
+{
+ call->intr = fc->flags & AFS_FS_CURSOR_INTR;
+ fc->type = call->type;
+}
+
static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
{
call->kvec[0].iov_base = buf;
@@ -1201,7 +1233,8 @@ static inline void afs_set_call_complete(struct afs_call *call,
*/
extern void afs_put_permits(struct afs_permits *);
extern void afs_clear_permits(struct afs_vnode *);
-extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
+extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int,
+ struct afs_status_cb *);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
@@ -1327,7 +1360,6 @@ extern int afs_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata);
extern int afs_writepage(struct page *, struct writeback_control *);
extern int afs_writepages(struct address_space *, struct writeback_control *);
-extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
@@ -1343,33 +1375,36 @@ extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
/*
* yfsclient.c
*/
-extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
-extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
-extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64,
- struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64,
- struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
-extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
-extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
-extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
- struct afs_fid *, struct afs_file_status *);
-extern int yfs_fs_rename(struct afs_fs_cursor *, const char *,
- struct afs_vnode *, const char *, u64, u64);
+extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_status_cb *,
+ struct afs_volsync *);
+extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_status_cb *, struct afs_read *);
+extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, struct afs_status_cb *,
+ struct afs_fid *, struct afs_status_cb *);
+extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, struct afs_status_cb *,
+ struct afs_fid *, struct afs_status_cb *);
+extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
+extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool,
+ struct afs_status_cb *);
+extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
+extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
+ struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *);
+extern int yfs_fs_rename(struct afs_fs_cursor *, const char *, struct afs_vnode *, const char *,
+ struct afs_status_cb *, struct afs_status_cb *);
extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
- pgoff_t, pgoff_t, unsigned, unsigned);
-extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+ pgoff_t, pgoff_t, unsigned, unsigned, struct afs_status_cb *);
+extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *, struct afs_status_cb *);
extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
-extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
-extern int yfs_fs_extend_lock(struct afs_fs_cursor *);
-extern int yfs_fs_release_lock(struct afs_fs_cursor *);
+extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t, struct afs_status_cb *);
+extern int yfs_fs_extend_lock(struct afs_fs_cursor *, struct afs_status_cb *);
+extern int yfs_fs_release_lock(struct afs_fs_cursor *, struct afs_status_cb *);
extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, struct afs_volsync *);
+ struct afs_fid *, struct afs_status_cb *,
+ struct afs_volsync *);
extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
- struct afs_fid *, struct afs_file_status *,
- struct afs_callback *, unsigned int,
- struct afs_volsync *);
+ struct afs_fid *, struct afs_status_cb *,
+ unsigned int, struct afs_volsync *);
struct yfs_acl {
struct afs_acl *acl; /* Dir/file/symlink ACL */
@@ -1382,8 +1417,10 @@ struct yfs_acl {
};
extern void yfs_free_opaque_acl(struct yfs_acl *);
-extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, unsigned int);
-extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *);
+extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, struct yfs_acl *,
+ struct afs_status_cb *);
+extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *,
+ struct afs_status_cb *);
/*
* Miscellaneous inline functions.
@@ -1398,14 +1435,6 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
return &vnode->vfs_inode;
}
-static inline void afs_vnode_commit_status(struct afs_fs_cursor *fc,
- struct afs_vnode *vnode,
- unsigned int cb_break)
-{
- if (fc->ac.error == 0)
- afs_cache_permit(vnode, fc->key, cb_break);
-}
-
static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
struct afs_vnode *vnode)
{
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index be2ee3bbd0a9..371501d28e08 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -53,7 +53,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
seq_printf(m, "%3u %6lld %2u %s\n",
atomic_read(&cell->usage),
cell->dns_expiry - ktime_get_real_seconds(),
- vllist ? vllist->nr_servers : 0,
+ vllist->nr_servers,
cell->name);
return 0;
}
@@ -296,8 +296,8 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
if (v == SEQ_START_TOKEN) {
seq_printf(m, "# source %s, status %s\n",
- dns_record_sources[vllist->source],
- dns_lookup_statuses[vllist->status]);
+ dns_record_sources[vllist ? vllist->source : 0],
+ dns_lookup_statuses[vllist ? vllist->status : 0]);
return 0;
}
@@ -336,7 +336,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
if (pos == 0)
return SEQ_START_TOKEN;
- if (!vllist || pos - 1 >= vllist->nr_servers)
+ if (pos - 1 >= vllist->nr_servers)
return NULL;
return &vllist->servers[pos - 1];
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index c3ae324781f8..b00c739e0e63 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -25,7 +25,7 @@
* them here also using the io_lock.
*/
bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- struct key *key)
+ struct key *key, bool intr)
{
memset(fc, 0, sizeof(*fc));
fc->vnode = vnode;
@@ -33,10 +33,15 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode
fc->ac.error = SHRT_MAX;
fc->error = -EDESTADDRREQ;
- if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
- fc->error = -EINTR;
- fc->flags |= AFS_FS_CURSOR_STOP;
- return false;
+ if (intr) {
+ fc->flags |= AFS_FS_CURSOR_INTR;
+ if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+ fc->error = -EINTR;
+ fc->flags |= AFS_FS_CURSOR_STOP;
+ return false;
+ }
+ } else {
+ mutex_lock(&vnode->io_lock);
}
if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
@@ -61,7 +66,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
fc->untried = (1UL << fc->server_list->nr_servers) - 1;
fc->index = READ_ONCE(fc->server_list->preferred);
- cbi = vnode->cb_interest;
+ cbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->io_lock));
if (cbi) {
/* See if the vnode's preferred record is still available */
for (i = 0; i < fc->server_list->nr_servers; i++) {
@@ -82,8 +88,8 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
/* Note that the callback promise is effectively broken */
write_seqlock(&vnode->cb_lock);
- ASSERTCMP(cbi, ==, vnode->cb_interest);
- vnode->cb_interest = NULL;
+ ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest));
+ rcu_assign_pointer(vnode->cb_interest, NULL);
if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
vnode->cb_break++;
write_sequnlock(&vnode->cb_lock);
@@ -118,10 +124,14 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
*/
static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
{
- msleep_interruptible(1000);
- if (signal_pending(current)) {
- fc->error = -ERESTARTSYS;
- return false;
+ if (fc->flags & AFS_FS_CURSOR_INTR) {
+ msleep_interruptible(1000);
+ if (signal_pending(current)) {
+ fc->error = -ERESTARTSYS;
+ return false;
+ }
+ } else {
+ msleep(1000);
}
return true;
@@ -408,7 +418,9 @@ selected_server:
if (error < 0)
goto failed_set_error;
- fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+ fc->cbi = afs_get_cb_interest(
+ rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->io_lock)));
read_lock(&server->fs_lock);
alist = rcu_dereference_protected(server->addresses,
@@ -459,6 +471,8 @@ no_more_servers:
s->probe.abort_code);
}
+ error = e.error;
+
failed_set_error:
fc->error = error;
failed:
@@ -476,12 +490,15 @@ failed:
bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
{
struct afs_vnode *vnode = fc->vnode;
- struct afs_cb_interest *cbi = vnode->cb_interest;
+ struct afs_cb_interest *cbi;
struct afs_addr_list *alist;
int error = fc->ac.error;
_enter("");
+ cbi = rcu_dereference_protected(vnode->cb_interest,
+ lockdep_is_held(&vnode->io_lock));
+
switch (error) {
case SHRT_MAX:
if (!cbi) {
@@ -490,7 +507,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
return false;
}
- fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+ fc->cbi = afs_get_cb_interest(cbi);
read_lock(&cbi->server->fs_lock);
alist = rcu_dereference_protected(cbi->server->addresses,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a34a89c75c6a..4fa5ce92b9b9 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -188,7 +188,7 @@ void afs_put_call(struct afs_call *call)
if (call->type->destructor)
call->type->destructor(call);
- afs_put_server(call->net, call->cm_server);
+ afs_put_server(call->net, call->server);
afs_put_cb_interest(call->net, call->cbi);
afs_put_addrlist(call->alist);
kfree(call->request);
@@ -417,6 +417,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
afs_wake_up_async_call :
afs_wake_up_call_waiter),
call->upgrade,
+ call->intr,
call->debug_id);
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
@@ -426,6 +427,10 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
call->rxcall = rxcall;
+ if (call->max_lifespan)
+ rxrpc_kernel_set_max_life(call->net->socket, rxcall,
+ call->max_lifespan);
+
/* send the request */
iov[0].iov_base = call->request;
iov[0].iov_len = call->request_size;
@@ -529,11 +534,11 @@ static void afs_deliver_to_call(struct afs_call *call)
return;
}
- if (call->want_reply_time &&
+ if (!call->have_reply_time &&
rxrpc_kernel_get_reply_time(call->net->socket,
call->rxcall,
&call->reply_time))
- call->want_reply_time = false;
+ call->have_reply_time = true;
ret = call->type->deliver(call);
state = READ_ONCE(call->state);
@@ -648,7 +653,7 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
break;
}
- if (timeout == 0 &&
+ if (call->intr && timeout == 0 &&
life == last_life && signal_pending(current)) {
if (stalled)
break;
@@ -691,10 +696,9 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
ret = ac->error;
switch (ret) {
case 0:
- if (call->ret_reply0) {
- ret = (long)call->reply[0];
- call->reply[0] = NULL;
- }
+ ret = call->ret0;
+ call->ret0 = 0;
+
/* Fall through */
case -ECONNABORTED:
ac->responded = true;
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 5f58a9a17e69..5d8ece98561e 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -87,11 +87,9 @@ void afs_clear_permits(struct afs_vnode *vnode)
permits = rcu_dereference_protected(vnode->permit_cache,
lockdep_is_held(&vnode->lock));
RCU_INIT_POINTER(vnode->permit_cache, NULL);
- vnode->cb_break++;
spin_unlock(&vnode->lock);
- if (permits)
- afs_put_permits(permits);
+ afs_put_permits(permits);
}
/*
@@ -118,10 +116,10 @@ static void afs_hash_permits(struct afs_permits *permits)
* as the ACL *may* have changed.
*/
void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
- unsigned int cb_break)
+ unsigned int cb_break, struct afs_status_cb *scb)
{
struct afs_permits *permits, *xpermits, *replacement, *zap, *new = NULL;
- afs_access_t caller_access = READ_ONCE(vnode->status.caller_access);
+ afs_access_t caller_access = scb->status.caller_access;
size_t size = 0;
bool changed = false;
int i, j;
@@ -148,7 +146,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
}
if (afs_cb_is_broken(cb_break, vnode,
- vnode->cb_interest)) {
+ rcu_dereference(vnode->cb_interest))) {
changed = true;
break;
}
@@ -178,7 +176,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
}
}
- if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest))
+ if (afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)))
goto someone_else_changed_it;
/* We need a ref on any permits list we want to copy as we'll have to
@@ -255,14 +253,16 @@ found:
kfree(new);
+ rcu_read_lock();
spin_lock(&vnode->lock);
zap = rcu_access_pointer(vnode->permit_cache);
- if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) &&
+ if (!afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)) &&
zap == permits)
rcu_assign_pointer(vnode->permit_cache, replacement);
else
zap = replacement;
spin_unlock(&vnode->lock);
+ rcu_read_unlock();
afs_put_permits(zap);
out_put:
afs_put_permits(permits);
@@ -322,13 +322,12 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
*/
_debug("no valid permit");
- ret = afs_fetch_status(vnode, key, false);
+ ret = afs_fetch_status(vnode, key, false, _access);
if (ret < 0) {
*_access = 0;
_leave(" = %d", ret);
return ret;
}
- *_access = vnode->status.caller_access;
}
_leave(" = 0 [access %x]", *_access);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 65b33b6da48b..52c170b59cfd 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -521,8 +521,15 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a
alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
&server->uuid);
if (IS_ERR(alist)) {
- fc->ac.error = PTR_ERR(alist);
- _leave(" = f [%d]", fc->ac.error);
+ if ((PTR_ERR(alist) == -ERESTARTSYS ||
+ PTR_ERR(alist) == -EINTR) &&
+ !(fc->flags & AFS_FS_CURSOR_INTR) &&
+ server->addresses) {
+ _leave(" = t [intr]");
+ return true;
+ }
+ fc->error = PTR_ERR(alist);
+ _leave(" = f [%d]", fc->error);
return false;
}
@@ -574,7 +581,11 @@ retry:
ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
TASK_INTERRUPTIBLE);
if (ret == -ERESTARTSYS) {
- fc->ac.error = ret;
+ if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
+ _leave(" = t [intr]");
+ return true;
+ }
+ fc->error = ret;
_leave(" = f [intr]");
return false;
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 783c68cd1a35..f18911e8d770 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -426,7 +426,7 @@ static int afs_set_super(struct super_block *sb, struct fs_context *fc)
static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
{
struct afs_super_info *as = AFS_FS_S(sb);
- struct afs_fid fid;
+ struct afs_iget_data iget_data;
struct inode *inode = NULL;
int ret;
@@ -451,11 +451,13 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
} else {
sprintf(sb->s_id, "%llu", as->volume->vid);
afs_activate_volume(as->volume);
- fid.vid = as->volume->vid;
- fid.vnode = 1;
- fid.vnode_hi = 0;
- fid.unique = 1;
- inode = afs_iget(sb, ctx->key, &fid, NULL, NULL, NULL, NULL);
+ iget_data.fid.vid = as->volume->vid;
+ iget_data.fid.vnode = 1;
+ iget_data.fid.vnode_hi = 0;
+ iget_data.fid.unique = 1;
+ iget_data.cb_v_break = as->volume->cb_v_break;
+ iget_data.cb_s_break = 0;
+ inode = afs_iget(sb, ctx->key, &iget_data, NULL, NULL, NULL);
}
if (IS_ERR(inode))
@@ -677,13 +679,12 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
vnode->volume = NULL;
vnode->lock_key = NULL;
vnode->permit_cache = NULL;
- vnode->cb_interest = NULL;
+ RCU_INIT_POINTER(vnode->cb_interest, NULL);
#ifdef CONFIG_AFS_FSCACHE
vnode->cache = NULL;
#endif
vnode->flags = 1 << AFS_VNODE_UNSET;
- vnode->cb_type = 0;
vnode->lock_state = AFS_VNODE_LOCK_NONE;
init_rwsem(&vnode->rmdir_lock);
@@ -708,7 +709,7 @@ static void afs_destroy_inode(struct inode *inode)
_debug("DESTROY INODE %p", inode);
- ASSERTCMP(vnode->cb_interest, ==, NULL);
+ ASSERTCMP(rcu_access_pointer(vnode->cb_interest), ==, NULL);
atomic_dec(&afs_count_active_inodes);
}
@@ -741,7 +742,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
return PTR_ERR(key);
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
fc.flags |= AFS_FS_CURSOR_NO_VSLEEP;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
@@ -749,7 +750,6 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
ret = afs_end_vnode_operation(&fc);
}
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
index b4f1a84519b9..61e25010ff33 100644
--- a/fs/afs/vl_list.c
+++ b/fs/afs/vl_list.c
@@ -232,18 +232,16 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
if (bs.status > NR__dns_lookup_status)
bs.status = NR__dns_lookup_status;
+ /* See if we can update an old server record */
server = NULL;
- if (previous) {
- /* See if we can update an old server record */
- for (i = 0; i < previous->nr_servers; i++) {
- struct afs_vlserver *p = previous->servers[i].server;
-
- if (p->name_len == bs.name_len &&
- p->port == bs.port &&
- strncasecmp(b, p->name, bs.name_len) == 0) {
- server = afs_get_vlserver(p);
- break;
- }
+ for (i = 0; i < previous->nr_servers; i++) {
+ struct afs_vlserver *p = previous->servers[i].server;
+
+ if (p->name_len == bs.name_len &&
+ p->port == bs.port &&
+ strncasecmp(b, p->name, bs.name_len) == 0) {
+ server = afs_get_vlserver(p);
+ break;
}
}
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index b05e0de04f42..beb991563939 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -33,8 +33,8 @@ static bool afs_vl_probe_done(struct afs_vlserver *server)
void afs_vlserver_probe_result(struct afs_call *call)
{
struct afs_addr_list *alist = call->alist;
- struct afs_vlserver *server = call->reply[0];
- unsigned int server_index = (long)call->reply[1];
+ struct afs_vlserver *server = call->vlserver;
+ unsigned int server_index = call->server_index;
unsigned int index = call->addr_ix;
unsigned int rtt = UINT_MAX;
bool have_result = false;
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index 7adde83a0648..3f845489a9f0 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -43,11 +43,29 @@ bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cel
static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
{
struct afs_cell *cell = vc->cell;
+ unsigned int dns_lookup_count;
+
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE ||
+ cell->dns_expiry <= ktime_get_real_seconds()) {
+ dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
+ set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
+ queue_work(afs_wq, &cell->manager);
+
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
+ if (wait_var_event_interruptible(
+ &cell->dns_lookup_count,
+ smp_load_acquire(&cell->dns_lookup_count)
+ != dns_lookup_count) < 0) {
+ vc->error = -ERESTARTSYS;
+ return false;
+ }
+ }
- if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
- TASK_INTERRUPTIBLE)) {
- vc->error = -ERESTARTSYS;
- return false;
+ /* Status load is ordered after lookup counter load */
+ if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
+ vc->error = -EDESTADDRREQ;
+ return false;
+ }
}
read_lock(&cell->vl_servers_lock);
@@ -55,7 +73,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
rcu_dereference_protected(cell->vl_servers,
lockdep_is_held(&cell->vl_servers_lock)));
read_unlock(&cell->vl_servers_lock);
- if (!vc->server_list || !vc->server_list->nr_servers)
+ if (!vc->server_list->nr_servers)
return false;
vc->untried = (1UL << vc->server_list->nr_servers) - 1;
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index dd9ba4e96fb3..3d4b9836a2e2 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -34,7 +34,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
uvldb = call->buffer;
- entry = call->reply[0];
+ entry = call->ret_vldb;
nr_servers = ntohl(uvldb->nServers);
if (nr_servers > AFS_NMAXNSERVERS)
@@ -110,7 +110,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
static void afs_destroy_vl_get_entry_by_name_u(struct afs_call *call)
{
- kfree(call->reply[0]);
+ kfree(call->ret_vldb);
afs_flat_call_destructor(call);
}
@@ -155,8 +155,8 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
}
call->key = vc->key;
- call->reply[0] = entry;
- call->ret_reply0 = true;
+ call->ret_vldb = entry;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
/* Marshall the parameters */
bp = call->request;
@@ -214,7 +214,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
- call->reply[0] = alist;
+ call->ret_alist = alist;
call->count = count;
call->count2 = nentries;
call->unmarshall++;
@@ -229,7 +229,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
if (ret < 0)
return ret;
- alist = call->reply[0];
+ alist = call->ret_alist;
bp = call->buffer;
count = min(call->count, 4U);
for (i = 0; i < count; i++)
@@ -249,8 +249,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
static void afs_vl_get_addrs_u_destructor(struct afs_call *call)
{
- afs_put_server(call->net, (struct afs_server *)call->reply[0]);
- kfree(call->reply[1]);
+ afs_put_addrlist(call->ret_alist);
return afs_flat_call_destructor(call);
}
@@ -287,8 +286,8 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
return ERR_PTR(-ENOMEM);
call->key = vc->key;
- call->reply[0] = NULL;
- call->ret_reply0 = true;
+ call->ret_alist = NULL;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
/* Marshall the parameters */
bp = call->request;
@@ -358,9 +357,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
static void afs_destroy_vl_get_capabilities(struct afs_call *call)
{
- struct afs_vlserver *server = call->reply[0];
-
- afs_put_vlserver(call->net, server);
+ afs_put_vlserver(call->net, call->vlserver);
afs_flat_call_destructor(call);
}
@@ -398,11 +395,11 @@ struct afs_call *afs_vl_get_capabilities(struct afs_net *net,
return ERR_PTR(-ENOMEM);
call->key = key;
- call->reply[0] = afs_get_vlserver(server);
- call->reply[1] = (void *)(long)server_index;
+ call->vlserver = afs_get_vlserver(server);
+ call->server_index = server_index;
call->upgrade = true;
- call->want_reply_time = true;
call->async = true;
+ call->max_lifespan = AFS_PROBE_MAX_LIFESPAN;
/* marshall the parameters */
bp = call->request;
@@ -460,7 +457,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (!alist)
return -ENOMEM;
alist->version = uniquifier;
- call->reply[0] = alist;
+ call->ret_alist = alist;
if (call->count == 0)
goto extract_volendpoints;
@@ -488,7 +485,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (ret < 0)
return ret;
- alist = call->reply[0];
+ alist = call->ret_alist;
bp = call->buffer;
switch (call->count2) {
case YFS_ENDPOINT_IPV4:
@@ -609,7 +606,6 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
break;
}
- alist = call->reply[0];
_leave(" = 0 [done]");
return 0;
}
@@ -644,8 +640,8 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
return ERR_PTR(-ENOMEM);
call->key = vc->key;
- call->reply[0] = NULL;
- call->ret_reply0 = true;
+ call->ret_alist = NULL;
+ call->max_lifespan = AFS_VL_MAX_LIFESPAN;
/* Marshall the parameters */
bp = call->request;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 0122d7445fba..8bcab95f1127 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -314,6 +314,46 @@ static void afs_redirty_pages(struct writeback_control *wbc,
}
/*
+ * completion of write to server
+ */
+static void afs_pages_written_back(struct afs_vnode *vnode,
+ pgoff_t first, pgoff_t last)
+{
+ struct pagevec pv;
+ unsigned long priv;
+ unsigned count, loop;
+
+ _enter("{%llx:%llu},{%lx-%lx}",
+ vnode->fid.vid, vnode->fid.vnode, first, last);
+
+ pagevec_init(&pv);
+
+ do {
+ _debug("done %lx-%lx", first, last);
+
+ count = last - first + 1;
+ if (count > PAGEVEC_SIZE)
+ count = PAGEVEC_SIZE;
+ pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
+ first, count, pv.pages);
+ ASSERTCMP(pv.nr, ==, count);
+
+ for (loop = 0; loop < count; loop++) {
+ priv = page_private(pv.pages[loop]);
+ trace_afs_page_dirty(vnode, tracepoint_string("clear"),
+ pv.pages[loop]->index, priv);
+ set_page_private(pv.pages[loop], 0);
+ end_page_writeback(pv.pages[loop]);
+ }
+ first += count;
+ __pagevec_release(&pv);
+ } while (first <= last);
+
+ afs_prune_wb_keys(vnode);
+ _leave("");
+}
+
+/*
* write to a file
*/
static int afs_store_data(struct address_space *mapping,
@@ -322,6 +362,7 @@ static int afs_store_data(struct address_space *mapping,
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_wb_key *wbk = NULL;
struct list_head *p;
int ret = -ENOKEY, ret2;
@@ -333,6 +374,10 @@ static int afs_store_data(struct address_space *mapping,
vnode->fid.unique,
first, last, offset, to);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ return -ENOMEM;
+
spin_lock(&vnode->wb_lock);
p = vnode->wb_keys.next;
@@ -351,6 +396,7 @@ try_next_key:
spin_unlock(&vnode->wb_lock);
afs_put_wb_key(wbk);
+ kfree(scb);
_leave(" = %d [no keys]", ret);
return ret;
@@ -361,14 +407,19 @@ found_key:
_debug("USE WB KEY %u", key_serial(wbk->key));
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, wbk->key, false)) {
+ afs_dataversion_t data_version = vnode->status.data_version + 1;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_store_data(&fc, mapping, first, last, offset, to);
+ afs_fs_store_data(&fc, mapping, first, last, offset, to, scb);
}
- afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_check_for_remote_deletion(&fc, vnode);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
+ if (fc.ac.error == 0)
+ afs_pages_written_back(vnode, first, last);
ret = afs_end_vnode_operation(&fc);
}
@@ -393,6 +444,7 @@ found_key:
}
afs_put_wb_key(wbk);
+ kfree(scb);
_leave(" = %d", ret);
return ret;
}
@@ -679,46 +731,6 @@ int afs_writepages(struct address_space *mapping,
}
/*
- * completion of write to server
- */
-void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
-{
- struct pagevec pv;
- unsigned long priv;
- unsigned count, loop;
- pgoff_t first = call->first, last = call->last;
-
- _enter("{%llx:%llu},{%lx-%lx}",
- vnode->fid.vid, vnode->fid.vnode, first, last);
-
- pagevec_init(&pv);
-
- do {
- _debug("done %lx-%lx", first, last);
-
- count = last - first + 1;
- if (count > PAGEVEC_SIZE)
- count = PAGEVEC_SIZE;
- pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
- first, count, pv.pages);
- ASSERTCMP(pv.nr, ==, count);
-
- for (loop = 0; loop < count; loop++) {
- priv = page_private(pv.pages[loop]);
- trace_afs_page_dirty(vnode, tracepoint_string("clear"),
- pv.pages[loop]->index, priv);
- set_page_private(pv.pages[loop], 0);
- end_page_writeback(pv.pages[loop]);
- }
- first += count;
- __pagevec_release(&pv);
- } while (first <= last);
-
- afs_prune_wb_keys(vnode);
- _leave("");
-}
-
-/*
* write to an AFS file
*/
ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index c81f85003fc7..17f58fea7ec1 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -47,40 +47,52 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler,
void *buffer, size_t size)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_acl *acl = NULL;
struct key *key;
- int ret;
+ int ret = -ENOMEM;
+
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ goto error;
key = afs_request_key(vnode->volume->cell);
- if (IS_ERR(key))
- return PTR_ERR(key);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_scb;
+ }
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- acl = afs_fs_fetch_acl(&fc);
+ acl = afs_fs_fetch_acl(&fc, scb);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
if (ret == 0) {
ret = acl->size;
if (size > 0) {
- ret = -ERANGE;
- if (acl->size > size)
- return -ERANGE;
- memcpy(buffer, acl->data, acl->size);
- ret = acl->size;
+ if (acl->size <= size)
+ memcpy(buffer, acl->data, acl->size);
+ else
+ ret = -ERANGE;
}
kfree(acl);
}
key_put(key);
+error_scb:
+ kfree(scb);
+error:
return ret;
}
@@ -93,41 +105,53 @@ static int afs_xattr_set_acl(const struct xattr_handler *handler,
const void *buffer, size_t size, int flags)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_acl *acl = NULL;
struct key *key;
- int ret;
+ int ret = -ENOMEM;
if (flags == XATTR_CREATE)
return -EINVAL;
- key = afs_request_key(vnode->volume->cell);
- if (IS_ERR(key))
- return PTR_ERR(key);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ goto error;
acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
- if (!acl) {
- key_put(key);
- return -ENOMEM;
+ if (!acl)
+ goto error_scb;
+
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_acl;
}
acl->size = size;
memcpy(acl->data, buffer, size);
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- afs_fs_store_acl(&fc, acl);
+ afs_fs_store_acl(&fc, acl, scb);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
- kfree(acl);
key_put(key);
+error_acl:
+ kfree(acl);
+error_scb:
+ kfree(scb);
+error:
return ret;
}
@@ -146,12 +170,12 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler,
void *buffer, size_t size)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(inode);
struct yfs_acl *yacl = NULL;
struct key *key;
- unsigned int flags = 0;
char buf[16], *data;
- int which = 0, dsize, ret;
+ int which = 0, dsize, ret = -ENOMEM;
if (strcmp(name, "acl") == 0)
which = 0;
@@ -164,65 +188,81 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler,
else
return -EOPNOTSUPP;
+ yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL);
+ if (!yacl)
+ goto error;
+
if (which == 0)
- flags |= YFS_ACL_WANT_ACL;
+ yacl->flags |= YFS_ACL_WANT_ACL;
else if (which == 3)
- flags |= YFS_ACL_WANT_VOL_ACL;
+ yacl->flags |= YFS_ACL_WANT_VOL_ACL;
+
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ goto error_yacl;
key = afs_request_key(vnode->volume->cell);
- if (IS_ERR(key))
- return PTR_ERR(key);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_scb;
+ }
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- yacl = yfs_fs_fetch_opaque_acl(&fc, flags);
+ yfs_fs_fetch_opaque_acl(&fc, yacl, scb);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
- if (ret == 0) {
- switch (which) {
- case 0:
- data = yacl->acl->data;
- dsize = yacl->acl->size;
- break;
- case 1:
- data = buf;
- dsize = snprintf(buf, sizeof(buf), "%u",
- yacl->inherit_flag);
- break;
- case 2:
- data = buf;
- dsize = snprintf(buf, sizeof(buf), "%u",
- yacl->num_cleaned);
- break;
- case 3:
- data = yacl->vol_acl->data;
- dsize = yacl->vol_acl->size;
- break;
- default:
- ret = -EOPNOTSUPP;
- goto out;
- }
+ if (ret < 0)
+ goto error_key;
+
+ switch (which) {
+ case 0:
+ data = yacl->acl->data;
+ dsize = yacl->acl->size;
+ break;
+ case 1:
+ data = buf;
+ dsize = snprintf(buf, sizeof(buf), "%u", yacl->inherit_flag);
+ break;
+ case 2:
+ data = buf;
+ dsize = snprintf(buf, sizeof(buf), "%u", yacl->num_cleaned);
+ break;
+ case 3:
+ data = yacl->vol_acl->data;
+ dsize = yacl->vol_acl->size;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ goto error_key;
+ }
- ret = dsize;
- if (size > 0) {
- if (dsize > size) {
- ret = -ERANGE;
- goto out;
- }
- memcpy(buffer, data, dsize);
+ ret = dsize;
+ if (size > 0) {
+ if (dsize > size) {
+ ret = -ERANGE;
+ goto error_key;
}
+ memcpy(buffer, data, dsize);
}
-out:
- yfs_free_opaque_acl(yacl);
+error_key:
key_put(key);
+error_scb:
+ kfree(scb);
+error_yacl:
+ yfs_free_opaque_acl(yacl);
+error:
return ret;
}
@@ -235,42 +275,54 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler,
const void *buffer, size_t size, int flags)
{
struct afs_fs_cursor fc;
+ struct afs_status_cb *scb;
struct afs_vnode *vnode = AFS_FS_I(inode);
struct afs_acl *acl = NULL;
struct key *key;
- int ret;
+ int ret = -ENOMEM;
if (flags == XATTR_CREATE ||
strcmp(name, "acl") != 0)
return -EINVAL;
- key = afs_request_key(vnode->volume->cell);
- if (IS_ERR(key))
- return PTR_ERR(key);
+ scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS);
+ if (!scb)
+ goto error;
acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL);
- if (!acl) {
- key_put(key);
- return -ENOMEM;
- }
+ if (!acl)
+ goto error_scb;
acl->size = size;
memcpy(acl->data, buffer, size);
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error_acl;
+ }
+
ret = -ERESTARTSYS;
- if (afs_begin_vnode_operation(&fc, vnode, key)) {
+ if (afs_begin_vnode_operation(&fc, vnode, key, true)) {
+ afs_dataversion_t data_version = vnode->status.data_version;
+
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(vnode);
- yfs_fs_store_opaque_acl2(&fc, acl);
+ yfs_fs_store_opaque_acl2(&fc, acl, scb);
}
afs_check_for_remote_deletion(&fc, fc.vnode);
- afs_vnode_commit_status(&fc, vnode, fc.cb_break);
+ afs_vnode_commit_status(&fc, vnode, fc.cb_break,
+ &data_version, scb);
ret = afs_end_vnode_operation(&fc);
}
+error_acl:
kfree(acl);
key_put(key);
+error_scb:
+ kfree(scb);
+error:
return ret;
}
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 6cf7d161baa1..10de675dc6fc 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -183,24 +183,19 @@ static void xdr_dump_bad(const __be32 *bp)
/*
* Decode a YFSFetchStatus block
*/
-static int xdr_decode_YFSFetchStatus(struct afs_call *call,
- const __be32 **_bp,
- struct afs_file_status *status,
- struct afs_vnode *vnode,
- const afs_dataversion_t *expected_version,
- struct afs_read *read_req)
+static int xdr_decode_YFSFetchStatus(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
+ struct afs_file_status *status = &scb->status;
u32 type;
- u8 flags = 0;
status->abort_code = ntohl(xdr->abort_code);
if (status->abort_code != 0) {
- if (vnode && status->abort_code == VNOVNODE) {
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ if (status->abort_code == VNOVNODE)
status->nlink = 0;
- __afs_break_callback(vnode);
- }
+ scb->have_error = true;
return 0;
}
@@ -209,77 +204,28 @@ static int xdr_decode_YFSFetchStatus(struct afs_call *call,
case AFS_FTYPE_FILE:
case AFS_FTYPE_DIR:
case AFS_FTYPE_SYMLINK:
- if (type != status->type &&
- vnode &&
- !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
- pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
- vnode->fid.vid,
- vnode->fid.vnode,
- vnode->fid.unique,
- status->type, type);
- goto bad;
- }
status->type = type;
break;
default:
goto bad;
}
-#define EXTRACT_M4(FIELD) \
- do { \
- u32 x = ntohl(xdr->FIELD); \
- if (status->FIELD != x) { \
- flags |= AFS_VNODE_META_CHANGED; \
- status->FIELD = x; \
- } \
- } while (0)
-
-#define EXTRACT_M8(FIELD) \
- do { \
- u64 x = xdr_to_u64(xdr->FIELD); \
- if (status->FIELD != x) { \
- flags |= AFS_VNODE_META_CHANGED; \
- status->FIELD = x; \
- } \
- } while (0)
-
-#define EXTRACT_D8(FIELD) \
- do { \
- u64 x = xdr_to_u64(xdr->FIELD); \
- if (status->FIELD != x) { \
- flags |= AFS_VNODE_DATA_CHANGED; \
- status->FIELD = x; \
- } \
- } while (0)
-
- EXTRACT_M4(nlink);
- EXTRACT_D8(size);
- EXTRACT_D8(data_version);
- EXTRACT_M8(author);
- EXTRACT_M8(owner);
- EXTRACT_M8(group);
- EXTRACT_M4(mode);
- EXTRACT_M4(caller_access); /* call ticket dependent */
- EXTRACT_M4(anon_access);
-
- status->mtime_client = xdr_to_time(xdr->mtime_client);
- status->mtime_server = xdr_to_time(xdr->mtime_server);
- status->lock_count = ntohl(xdr->lock_count);
-
- if (read_req) {
- read_req->data_version = status->data_version;
- read_req->file_size = status->size;
- }
+ status->nlink = ntohl(xdr->nlink);
+ status->author = xdr_to_u64(xdr->author);
+ status->owner = xdr_to_u64(xdr->owner);
+ status->caller_access = ntohl(xdr->caller_access); /* Ticket dependent */
+ status->anon_access = ntohl(xdr->anon_access);
+ status->mode = ntohl(xdr->mode) & S_IALLUGO;
+ status->group = xdr_to_u64(xdr->group);
+ status->lock_count = ntohl(xdr->lock_count);
+
+ status->mtime_client = xdr_to_time(xdr->mtime_client);
+ status->mtime_server = xdr_to_time(xdr->mtime_server);
+ status->size = xdr_to_u64(xdr->size);
+ status->data_version = xdr_to_u64(xdr->data_version);
+ scb->have_status = true;
*_bp += xdr_size(xdr);
-
- if (vnode) {
- if (test_bit(AFS_VNODE_UNSET, &vnode->flags))
- flags |= AFS_VNODE_NOT_YET_SET;
- afs_update_inode_from_status(vnode, status, expected_version,
- flags);
- }
-
return 0;
bad:
@@ -288,73 +234,20 @@ bad:
}
/*
- * Decode the file status. We need to lock the target vnode if we're going to
- * update its status so that stat() sees the attributes update atomically.
- */
-static int yfs_decode_status(struct afs_call *call,
- const __be32 **_bp,
- struct afs_file_status *status,
- struct afs_vnode *vnode,
- const afs_dataversion_t *expected_version,
- struct afs_read *read_req)
-{
- int ret;
-
- if (!vnode)
- return xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
- expected_version, read_req);
-
- write_seqlock(&vnode->cb_lock);
- ret = xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
- expected_version, read_req);
- write_sequnlock(&vnode->cb_lock);
- return ret;
-}
-
-/*
* Decode a YFSCallBack block
*/
-static void xdr_decode_YFSCallBack(struct afs_call *call,
- struct afs_vnode *vnode,
- const __be32 **_bp)
-{
- struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp;
- struct afs_cb_interest *old, *cbi = call->cbi;
- u64 cb_expiry;
-
- write_seqlock(&vnode->cb_lock);
-
- if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
- cb_expiry = xdr_to_u64(xdr->expiration_time);
- do_div(cb_expiry, 10 * 1000 * 1000);
- vnode->cb_version = ntohl(xdr->version);
- vnode->cb_type = ntohl(xdr->type);
- vnode->cb_expires_at = cb_expiry + ktime_get_real_seconds();
- old = vnode->cb_interest;
- if (old != call->cbi) {
- vnode->cb_interest = cbi;
- cbi = old;
- }
- set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
- }
-
- write_sequnlock(&vnode->cb_lock);
- call->cbi = cbi;
- *_bp += xdr_size(xdr);
-}
-
-static void xdr_decode_YFSCallBack_raw(const __be32 **_bp,
- struct afs_callback *cb)
+static void xdr_decode_YFSCallBack(const __be32 **_bp,
+ struct afs_call *call,
+ struct afs_status_cb *scb)
{
struct yfs_xdr_YFSCallBack *x = (void *)*_bp;
- u64 cb_expiry;
-
- cb_expiry = xdr_to_u64(x->expiration_time);
- do_div(cb_expiry, 10 * 1000 * 1000);
- cb->version = ntohl(x->version);
- cb->type = ntohl(x->type);
- cb->expires_at = cb_expiry + ktime_get_real_seconds();
+ struct afs_callback *cb = &scb->callback;
+ ktime_t cb_expiry;
+ cb_expiry = call->reply_time;
+ cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100);
+ cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC);
+ scb->have_cb = true;
*_bp += xdr_size(x);
}
@@ -442,11 +335,10 @@ static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp,
}
/*
- * deliver reply data to an FS.FetchStatus
+ * Deliver a reply that's a status, callback and volsync.
*/
-static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call)
+static int yfs_deliver_fs_status_cb_and_volsync(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -454,16 +346,36 @@ static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call)
if (ret < 0)
return ret;
- _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSCallBack(call, vnode, &bp);
- xdr_decode_YFSVolSync(&bp, call->reply[1]);
+ xdr_decode_YFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * Deliver reply data to operations that just return a file status and a volume
+ * sync record.
+ */
+static int yfs_deliver_status_and_volsync(struct afs_call *call)
+{
+ const __be32 *bp;
+ int ret;
+
+ ret = afs_transfer_reply(call);
+ if (ret < 0)
+ return ret;
+
+ bp = call->buffer;
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
+ if (ret < 0)
+ return ret;
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -475,15 +387,15 @@ static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call)
static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = {
.name = "YFS.FetchStatus(vnode)",
.op = yfs_FS_FetchStatus,
- .deliver = yfs_deliver_fs_fetch_status_vnode,
+ .deliver = yfs_deliver_fs_status_cb_and_volsync,
.destructor = afs_flat_call_destructor,
};
/*
* Fetch the status information for a file.
*/
-int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync,
- bool new_inode)
+int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb,
+ struct afs_volsync *volsync)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -505,9 +417,8 @@ int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
}
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = volsync;
- call->expected_version = new_inode ? 1 : vnode->status.data_version;
+ call->out_scb = scb;
+ call->out_volsync = volsync;
/* marshall the parameters */
bp = call->request;
@@ -516,9 +427,9 @@ int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
bp = xdr_encode_YFSFid(bp, &vnode->fid);
yfs_check_req(call, bp);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -528,8 +439,7 @@ int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
*/
static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
- struct afs_read *req = call->reply[2];
+ struct afs_read *req = call->read_request;
const __be32 *bp;
unsigned int size;
int ret;
@@ -586,7 +496,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
if (req->offset == PAGE_SIZE) {
req->offset = 0;
if (req->page_done)
- req->page_done(call, req);
+ req->page_done(req);
req->index++;
if (req->remain > 0)
goto begin_page;
@@ -623,12 +533,14 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
return ret;
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &vnode->status.data_version, req);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSCallBack(call, vnode, &bp);
- xdr_decode_YFSVolSync(&bp, call->reply[1]);
+ xdr_decode_YFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
+
+ req->data_version = call->out_scb->status.data_version;
+ req->file_size = call->out_scb->status.size;
call->unmarshall++;
@@ -642,7 +554,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
zero_user_segment(req->pages[req->index],
req->offset, PAGE_SIZE);
if (req->page_done)
- req->page_done(call, req);
+ req->page_done(req);
req->offset = 0;
}
@@ -652,9 +564,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
static void yfs_fetch_data_destructor(struct afs_call *call)
{
- struct afs_read *req = call->reply[2];
-
- afs_put_read(req);
+ afs_put_read(call->read_request);
afs_flat_call_destructor(call);
}
@@ -671,7 +581,8 @@ static const struct afs_call_type yfs_RXYFSFetchData64 = {
/*
* Fetch data from a file.
*/
-int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
+int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_status_cb *scb,
+ struct afs_read *req)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -693,11 +604,9 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = NULL; /* volsync */
- call->reply[2] = req;
- call->expected_version = vnode->status.data_version;
- call->want_reply_time = true;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
+ call->read_request = req;
/* marshall the parameters */
bp = call->request;
@@ -709,9 +618,9 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
yfs_check_req(call, bp);
refcount_inc(&req->usage);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -721,7 +630,6 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
*/
static int yfs_deliver_fs_create_vnode(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -733,16 +641,15 @@ static int yfs_deliver_fs_create_vnode(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_YFSFid(&bp, call->reply[1]);
- ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ xdr_decode_YFSFid(&bp, call->out_fid);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSCallBack_raw(&bp, call->reply[3]);
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSCallBack(&bp, call, call->out_scb);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -764,14 +671,13 @@ static const struct afs_call_type afs_RXFSCreateFile = {
int yfs_fs_create_file(struct afs_fs_cursor *fc,
const char *name,
umode_t mode,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb)
+ struct afs_status_cb *new_scb)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, rplsz;
__be32 *bp;
@@ -795,24 +701,23 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->reply[3] = newcb;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = new_scb;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_u32(bp, YFSCREATEFILE);
bp = xdr_encode_u32(bp, 0); /* RPC flags */
- bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_YFSFid(bp, &dvnode->fid);
bp = xdr_encode_string(bp, name, namesz);
bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
yfs_check_req(call, bp);
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call1(call, &vnode->fid, name);
+ trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -830,14 +735,13 @@ static const struct afs_call_type yfs_RXFSMakeDir = {
int yfs_fs_make_dir(struct afs_fs_cursor *fc,
const char *name,
umode_t mode,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus,
- struct afs_callback *newcb)
+ struct afs_status_cb *new_scb)
{
- struct afs_vnode *vnode = fc->vnode;
+ struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
- struct afs_net *net = afs_v2net(vnode);
+ struct afs_net *net = afs_v2net(dvnode);
size_t namesz, reqsz, rplsz;
__be32 *bp;
@@ -860,23 +764,22 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->reply[3] = newcb;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = new_scb;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_u32(bp, YFSMAKEDIR);
bp = xdr_encode_u32(bp, 0); /* RPC flags */
- bp = xdr_encode_YFSFid(bp, &vnode->fid);
+ bp = xdr_encode_YFSFid(bp, &dvnode->fid);
bp = xdr_encode_string(bp, name, namesz);
bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
yfs_check_req(call, bp);
afs_use_fs_server(call, fc->cbi);
- trace_afs_make_fs_call1(call, &vnode->fid, name);
+ trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -886,8 +789,6 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc,
*/
static int yfs_deliver_fs_remove_file2(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply[0];
- struct afs_vnode *vnode = call->reply[1];
struct afs_fid fid;
const __be32 *bp;
int ret;
@@ -898,20 +799,18 @@ static int yfs_deliver_fs_remove_file2(struct afs_call *call)
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
xdr_decode_YFSFid(&bp, &fid);
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
/* Was deleted if vnode->status.abort_code == VNOVNODE. */
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
return 0;
}
@@ -929,7 +828,8 @@ static const struct afs_call_type yfs_RXYFSRemoveFile2 = {
* Remove a file and retrieve new file status.
*/
int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, u64 current_data_version)
+ const char *name, struct afs_status_cb *dvnode_scb,
+ struct afs_status_cb *vnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
@@ -954,9 +854,8 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_scb = vnode_scb;
/* marshall the parameters */
bp = call->request;
@@ -968,6 +867,7 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -977,7 +877,6 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
*/
static int yfs_deliver_fs_remove(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -987,14 +886,12 @@ static int yfs_deliver_fs_remove(struct afs_call *call)
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
return 0;
}
@@ -1019,7 +916,8 @@ static const struct afs_call_type yfs_RXYFSRemoveDir = {
* remove a file or directory
*/
int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, bool isdir, u64 current_data_version)
+ const char *name, bool isdir,
+ struct afs_status_cb *dvnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
@@ -1042,9 +940,7 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
/* marshall the parameters */
bp = call->request;
@@ -1056,6 +952,7 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1065,7 +962,6 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
*/
static int yfs_deliver_fs_link(struct afs_call *call)
{
- struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
const __be32 *bp;
int ret;
@@ -1075,16 +971,14 @@ static int yfs_deliver_fs_link(struct afs_call *call)
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
}
@@ -1103,7 +997,9 @@ static const struct afs_call_type yfs_RXYFSLink = {
* Make a hard link.
*/
int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
- const char *name, u64 current_data_version)
+ const char *name,
+ struct afs_status_cb *dvnode_scb,
+ struct afs_status_cb *vnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
@@ -1127,9 +1023,8 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = vnode;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_scb = vnode_scb;
/* marshall the parameters */
bp = call->request;
@@ -1142,6 +1037,7 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &vnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1151,7 +1047,6 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
*/
static int yfs_deliver_fs_symlink(struct afs_call *call)
{
- struct afs_vnode *vnode = call->reply[0];
const __be32 *bp;
int ret;
@@ -1163,15 +1058,14 @@ static int yfs_deliver_fs_symlink(struct afs_call *call)
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_YFSFid(&bp, call->reply[1]);
- ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+ xdr_decode_YFSFid(&bp, call->out_fid);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
@@ -1193,9 +1087,9 @@ static const struct afs_call_type yfs_RXYFSSymlink = {
int yfs_fs_symlink(struct afs_fs_cursor *fc,
const char *name,
const char *contents,
- u64 current_data_version,
+ struct afs_status_cb *dvnode_scb,
struct afs_fid *newfid,
- struct afs_file_status *newstatus)
+ struct afs_status_cb *vnode_scb)
{
struct afs_vnode *dvnode = fc->vnode;
struct afs_call *call;
@@ -1222,10 +1116,9 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = dvnode;
- call->reply[1] = newfid;
- call->reply[2] = newstatus;
- call->expected_version = current_data_version + 1;
+ call->out_dir_scb = dvnode_scb;
+ call->out_fid = newfid;
+ call->out_scb = vnode_scb;
/* marshall the parameters */
bp = call->request;
@@ -1239,6 +1132,7 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call1(call, &dvnode->fid, name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1248,8 +1142,6 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc,
*/
static int yfs_deliver_fs_rename(struct afs_call *call)
{
- struct afs_vnode *orig_dvnode = call->reply[0];
- struct afs_vnode *new_dvnode = call->reply[1];
const __be32 *bp;
int ret;
@@ -1259,20 +1151,17 @@ static int yfs_deliver_fs_rename(struct afs_call *call)
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- if (new_dvnode != orig_dvnode) {
- ret = yfs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
- &call->expected_version_2, NULL);
+ if (call->out_dir_scb != call->out_scb) {
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
}
- xdr_decode_YFSVolSync(&bp, NULL);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
return 0;
}
@@ -1294,8 +1183,8 @@ int yfs_fs_rename(struct afs_fs_cursor *fc,
const char *orig_name,
struct afs_vnode *new_dvnode,
const char *new_name,
- u64 current_orig_data_version,
- u64 current_new_data_version)
+ struct afs_status_cb *orig_dvnode_scb,
+ struct afs_status_cb *new_dvnode_scb)
{
struct afs_vnode *orig_dvnode = fc->vnode;
struct afs_call *call;
@@ -1321,10 +1210,8 @@ int yfs_fs_rename(struct afs_fs_cursor *fc,
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = orig_dvnode;
- call->reply[1] = new_dvnode;
- call->expected_version = current_orig_data_version + 1;
- call->expected_version_2 = current_new_data_version + 1;
+ call->out_dir_scb = orig_dvnode_scb;
+ call->out_scb = new_dvnode_scb;
/* marshall the parameters */
bp = call->request;
@@ -1338,46 +1225,18 @@ int yfs_fs_rename(struct afs_fs_cursor *fc,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
- * Deliver reply data to a YFS.StoreData64 operation.
- */
-static int yfs_deliver_fs_store_data(struct afs_call *call)
-{
- struct afs_vnode *vnode = call->reply[0];
- const __be32 *bp;
- int ret;
-
- _enter("");
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
- if (ret < 0)
- return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
-
- afs_pages_written_back(vnode, call);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
-/*
* YFS.StoreData64 operation type.
*/
static const struct afs_call_type yfs_RXYFSStoreData64 = {
.name = "YFS.StoreData64",
.op = yfs_FS_StoreData64,
- .deliver = yfs_deliver_fs_store_data,
+ .deliver = yfs_deliver_status_and_volsync,
.destructor = afs_flat_call_destructor,
};
@@ -1386,7 +1245,8 @@ static const struct afs_call_type yfs_RXYFSStoreData64 = {
*/
int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
pgoff_t first, pgoff_t last,
- unsigned offset, unsigned to)
+ unsigned offset, unsigned to,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1424,13 +1284,12 @@ int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
call->key = fc->key;
call->mapping = mapping;
- call->reply[0] = vnode;
call->first = first;
call->last = last;
call->first_offset = offset;
call->last_to = to;
call->send_pages = true;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1445,51 +1304,25 @@ int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
- * deliver reply data to an FS.StoreStatus
- */
-static int yfs_deliver_fs_store_status(struct afs_call *call)
-{
- struct afs_vnode *vnode = call->reply[0];
- const __be32 *bp;
- int ret;
-
- _enter("");
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
- if (ret < 0)
- return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
-/*
* YFS.StoreStatus operation type
*/
static const struct afs_call_type yfs_RXYFSStoreStatus = {
.name = "YFS.StoreStatus",
.op = yfs_FS_StoreStatus,
- .deliver = yfs_deliver_fs_store_status,
+ .deliver = yfs_deliver_status_and_volsync,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = {
.name = "YFS.StoreData64",
.op = yfs_FS_StoreData64,
- .deliver = yfs_deliver_fs_store_status,
+ .deliver = yfs_deliver_status_and_volsync,
.destructor = afs_flat_call_destructor,
};
@@ -1497,7 +1330,8 @@ static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = {
* Set the attributes on a file, using YFS.StoreData64 rather than
* YFS.StoreStatus so as to alter the file size also.
*/
-static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
+static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1518,8 +1352,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version + 1;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1534,6 +1367,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1542,7 +1376,8 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
* Set the attributes on a file, using YFS.StoreData64 if there's a change in
* file size, and YFS.StoreStatus otherwise.
*/
-int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
+int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1550,7 +1385,7 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
__be32 *bp;
if (attr->ia_valid & ATTR_SIZE)
- return yfs_fs_setattr_size(fc, attr);
+ return yfs_fs_setattr_size(fc, attr, scb);
_enter(",%x,{%llx:%llu},,",
key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
@@ -1565,8 +1400,7 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->expected_version = vnode->status.data_version;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1578,6 +1412,7 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1607,7 +1442,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
return ret;
bp = call->buffer;
- xdr_decode_YFSFetchVolumeStatus(&bp, call->reply[1]);
+ xdr_decode_YFSFetchVolumeStatus(&bp, call->out_volstatus);
call->unmarshall++;
afs_extract_to_tmp(call);
@@ -1623,7 +1458,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
return afs_protocol_error(call, -EBADMSG,
afs_eproto_volname_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the volume name */
@@ -1633,7 +1468,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("volname '%s'", p);
afs_extract_to_tmp(call);
@@ -1651,7 +1486,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
return afs_protocol_error(call, -EBADMSG,
afs_eproto_offline_msg_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the offline message */
@@ -1661,7 +1496,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("offline '%s'", p);
@@ -1680,7 +1515,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
return afs_protocol_error(call, -EBADMSG,
afs_eproto_motd_len);
size = (call->count + 3) & ~3; /* It's padded */
- afs_extract_begin(call, call->reply[2], size);
+ afs_extract_to_buf(call, size);
call->unmarshall++;
/* Fall through - and extract the message of the day */
@@ -1690,7 +1525,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
if (ret < 0)
return ret;
- p = call->reply[2];
+ p = call->buffer;
p[call->count] = 0;
_debug("motd '%s'", p);
@@ -1706,23 +1541,13 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
}
/*
- * Destroy a YFS.GetVolumeStatus call.
- */
-static void yfs_get_volume_status_call_destructor(struct afs_call *call)
-{
- kfree(call->reply[2]);
- call->reply[2] = NULL;
- afs_flat_call_destructor(call);
-}
-
-/*
* YFS.GetVolumeStatus operation type
*/
static const struct afs_call_type yfs_RXYFSGetVolumeStatus = {
.name = "YFS.GetVolumeStatus",
.op = yfs_FS_GetVolumeStatus,
.deliver = yfs_deliver_fs_get_volume_status,
- .destructor = yfs_get_volume_status_call_destructor,
+ .destructor = afs_flat_call_destructor,
};
/*
@@ -1735,28 +1560,21 @@ int yfs_fs_get_volume_status(struct afs_fs_cursor *fc,
struct afs_call *call;
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
- void *tmpbuf;
_enter("");
- tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
- if (!tmpbuf)
- return -ENOMEM;
-
call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus,
sizeof(__be32) * 2 +
sizeof(struct yfs_xdr_u64),
- sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
- sizeof(__be32));
- if (!call) {
- kfree(tmpbuf);
+ max_t(size_t,
+ sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
+ sizeof(__be32),
+ AFSOPAQUEMAX + 1));
+ if (!call)
return -ENOMEM;
- }
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[1] = vs;
- call->reply[2] = tmpbuf;
+ call->out_volstatus = vs;
/* marshall the parameters */
bp = call->request;
@@ -1767,39 +1585,12 @@ int yfs_fs_get_volume_status(struct afs_fs_cursor *fc,
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
- * Deliver reply data to operations that just return a file status and a volume
- * sync record.
- */
-static int yfs_deliver_status_and_volsync(struct afs_call *call)
-{
- struct afs_vnode *vnode = call->reply[0];
- const __be32 *bp;
- int ret;
-
- _enter("{%u}", call->unmarshall);
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
- if (ret < 0)
- return ret;
- xdr_decode_YFSVolSync(&bp, NULL);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
-/*
* YFS.SetLock operation type
*/
static const struct afs_call_type yfs_RXYFSSetLock = {
@@ -1834,7 +1625,8 @@ static const struct afs_call_type yfs_RXYFSReleaseLock = {
/*
* Set a lock on a file
*/
-int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
+int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1853,8 +1645,8 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->want_reply_time = true;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1866,6 +1658,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_calli(call, &vnode->fid, type);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1873,7 +1666,7 @@ int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
/*
* extend a lock on a file
*/
-int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
+int yfs_fs_extend_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1891,8 +1684,8 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
- call->want_reply_time = true;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1903,6 +1696,7 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -1910,7 +1704,7 @@ int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
/*
* release a lock on a file
*/
-int yfs_fs_release_lock(struct afs_fs_cursor *fc)
+int yfs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -1928,7 +1722,8 @@ int yfs_fs_release_lock(struct afs_fs_cursor *fc)
return -ENOMEM;
call->key = fc->key;
- call->reply[0] = vnode;
+ call->lvnode = vnode;
+ call->out_scb = scb;
/* marshall the parameters */
bp = call->request;
@@ -1939,48 +1734,18 @@ int yfs_fs_release_lock(struct afs_fs_cursor *fc)
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
/*
- * Deliver reply data to an FS.FetchStatus with no vnode.
- */
-static int yfs_deliver_fs_fetch_status(struct afs_call *call)
-{
- struct afs_file_status *status = call->reply[1];
- struct afs_callback *callback = call->reply[2];
- struct afs_volsync *volsync = call->reply[3];
- struct afs_vnode *vnode = call->reply[0];
- const __be32 *bp;
- int ret;
-
- ret = afs_transfer_reply(call);
- if (ret < 0)
- return ret;
-
- _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
-
- /* unmarshall the reply once we've received all of it */
- bp = call->buffer;
- ret = yfs_decode_status(call, &bp, status, vnode,
- &call->expected_version, NULL);
- if (ret < 0)
- return ret;
- xdr_decode_YFSCallBack_raw(&bp, callback);
- xdr_decode_YFSVolSync(&bp, volsync);
-
- _leave(" = 0 [done]");
- return 0;
-}
-
-/*
* YFS.FetchStatus operation type
*/
static const struct afs_call_type yfs_RXYFSFetchStatus = {
.name = "YFS.FetchStatus",
.op = yfs_FS_FetchStatus,
- .deliver = yfs_deliver_fs_fetch_status,
+ .deliver = yfs_deliver_fs_status_cb_and_volsync,
.destructor = afs_flat_call_destructor,
};
@@ -1990,8 +1755,7 @@ static const struct afs_call_type yfs_RXYFSFetchStatus = {
int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
struct afs_net *net,
struct afs_fid *fid,
- struct afs_file_status *status,
- struct afs_callback *callback,
+ struct afs_status_cb *scb,
struct afs_volsync *volsync)
{
struct afs_call *call;
@@ -2012,11 +1776,8 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
}
call->key = fc->key;
- call->reply[0] = NULL; /* vnode for fid[0] */
- call->reply[1] = status;
- call->reply[2] = callback;
- call->reply[3] = volsync;
- call->expected_version = 1; /* vnode->status.data_version */
+ call->out_scb = scb;
+ call->out_volsync = volsync;
/* marshall the parameters */
bp = call->request;
@@ -2025,9 +1786,9 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
bp = xdr_encode_YFSFid(bp, fid);
yfs_check_req(call, bp);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, fid);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -2037,9 +1798,7 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
*/
static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
{
- struct afs_file_status *statuses;
- struct afs_callback *callbacks;
- struct afs_vnode *vnode = call->reply[0];
+ struct afs_status_cb *scb;
const __be32 *bp;
u32 tmp;
int ret;
@@ -2078,10 +1837,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
return ret;
bp = call->buffer;
- statuses = call->reply[1];
- ret = yfs_decode_status(call, &bp, &statuses[call->count],
- call->count == 0 ? vnode : NULL,
- NULL, NULL);
+ scb = &call->out_scb[call->count];
+ ret = xdr_decode_YFSFetchStatus(&bp, call, scb);
if (ret < 0)
return ret;
@@ -2120,13 +1877,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
_debug("unmarshall CB array");
bp = call->buffer;
- callbacks = call->reply[2];
- xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]);
- statuses = call->reply[1];
- if (call->count == 0 && vnode && statuses[0].abort_code == 0) {
- bp = call->buffer;
- xdr_decode_YFSCallBack(call, vnode, &bp);
- }
+ scb = &call->out_scb[call->count];
+ xdr_decode_YFSCallBack(&bp, call, scb);
call->count++;
if (call->count < call->count2)
goto more_cbs;
@@ -2141,7 +1893,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
return ret;
bp = call->buffer;
- xdr_decode_YFSVolSync(&bp, call->reply[3]);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
@@ -2170,8 +1922,7 @@ static const struct afs_call_type yfs_RXYFSInlineBulkStatus = {
int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
struct afs_net *net,
struct afs_fid *fids,
- struct afs_file_status *statuses,
- struct afs_callback *callbacks,
+ struct afs_status_cb *statuses,
unsigned int nr_fids,
struct afs_volsync *volsync)
{
@@ -2194,10 +1945,8 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
}
call->key = fc->key;
- call->reply[0] = NULL; /* vnode for fid[0] */
- call->reply[1] = statuses;
- call->reply[2] = callbacks;
- call->reply[3] = volsync;
+ call->out_scb = statuses;
+ call->out_volsync = volsync;
call->count2 = nr_fids;
/* marshall the parameters */
@@ -2209,9 +1958,9 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
bp = xdr_encode_YFSFid(bp, &fids[i]);
yfs_check_req(call, bp);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &fids[0]);
+ afs_set_fc_call(call, fc);
afs_make_call(&fc->ac, call, GFP_NOFS);
return afs_wait_for_call_to_complete(call, &fc->ac);
}
@@ -2221,9 +1970,7 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
*/
static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
{
- struct afs_volsync *volsync = call->reply[2];
- struct afs_vnode *vnode = call->reply[1];
- struct yfs_acl *yacl = call->reply[0];
+ struct yfs_acl *yacl = call->out_yacl;
struct afs_acl *acl;
const __be32 *bp;
unsigned int size;
@@ -2308,11 +2055,10 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
bp = call->buffer;
yacl->inherit_flag = ntohl(*bp++);
yacl->num_cleaned = ntohl(*bp++);
- ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
- &call->expected_version, NULL);
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
if (ret < 0)
return ret;
- xdr_decode_YFSVolSync(&bp, volsync);
+ xdr_decode_YFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
@@ -2333,12 +2079,6 @@ void yfs_free_opaque_acl(struct yfs_acl *yacl)
}
}
-static void yfs_destroy_fs_fetch_opaque_acl(struct afs_call *call)
-{
- yfs_free_opaque_acl(call->reply[0]);
- afs_flat_call_destructor(call);
-}
-
/*
* YFS.FetchOpaqueACL operation type
*/
@@ -2346,18 +2086,18 @@ static const struct afs_call_type yfs_RXYFSFetchOpaqueACL = {
.name = "YFS.FetchOpaqueACL",
.op = yfs_FS_FetchOpaqueACL,
.deliver = yfs_deliver_fs_fetch_opaque_acl,
- .destructor = yfs_destroy_fs_fetch_opaque_acl,
+ .destructor = afs_flat_call_destructor,
};
/*
* Fetch the YFS advanced ACLs for a file.
*/
struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc,
- unsigned int flags)
+ struct yfs_acl *yacl,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
- struct yfs_acl *yacl;
struct afs_net *net = afs_v2net(vnode);
__be32 *bp;
@@ -2370,19 +2110,15 @@ struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc,
sizeof(__be32) * 2 +
sizeof(struct yfs_xdr_YFSFetchStatus) +
sizeof(struct yfs_xdr_YFSVolSync));
- if (!call)
- goto nomem;
-
- yacl = kzalloc(sizeof(struct yfs_acl), GFP_KERNEL);
- if (!yacl)
- goto nomem_call;
+ if (!call) {
+ fc->ac.error = -ENOMEM;
+ return ERR_PTR(-ENOMEM);
+ }
- yacl->flags = flags;
call->key = fc->key;
- call->reply[0] = yacl;
- call->reply[1] = vnode;
- call->reply[2] = NULL; /* volsync */
- call->ret_reply0 = true;
+ call->out_yacl = yacl;
+ call->out_scb = scb;
+ call->out_volsync = NULL;
/* marshall the parameters */
bp = call->request;
@@ -2391,17 +2127,10 @@ struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc,
bp = xdr_encode_YFSFid(bp, &vnode->fid);
yfs_check_req(call, bp);
- call->cb_break = fc->cb_break;
afs_use_fs_server(call, fc->cbi);
trace_afs_make_fs_call(call, &vnode->fid);
afs_make_call(&fc->ac, call, GFP_KERNEL);
return (struct yfs_acl *)afs_wait_for_call_to_complete(call, &fc->ac);
-
-nomem_call:
- afs_put_call(call);
-nomem:
- fc->ac.error = -ENOMEM;
- return ERR_PTR(-ENOMEM);
}
/*
@@ -2417,7 +2146,8 @@ static const struct afs_call_type yfs_RXYFSStoreOpaqueACL2 = {
/*
* Fetch the YFS ACL for a file.
*/
-int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl)
+int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl,
+ struct afs_status_cb *scb)
{
struct afs_vnode *vnode = fc->vnode;
struct afs_call *call;
@@ -2441,8 +2171,8 @@ int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl
}
call->key = fc->key;
- call->reply[0] = vnode;
- call->reply[2] = NULL; /* volsync */
+ call->out_scb = scb;
+ call->out_volsync = NULL;
/* marshall the parameters */
bp = call->request;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7d09d125f148..fa9e99a962e0 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -524,6 +524,19 @@ static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
+static inline int make_prot(u32 p_flags)
+{
+ int prot = 0;
+
+ if (p_flags & PF_R)
+ prot |= PROT_READ;
+ if (p_flags & PF_W)
+ prot |= PROT_WRITE;
+ if (p_flags & PF_X)
+ prot |= PROT_EXEC;
+ return prot;
+}
+
/* This is much more generalized than the library routine read function,
so we keep this separate. Technically the library read function
is only provided so that we can read a.out libraries that have
@@ -563,16 +576,10 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
if (eppnt->p_type == PT_LOAD) {
int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
- int elf_prot = 0;
+ int elf_prot = make_prot(eppnt->p_flags);
unsigned long vaddr = 0;
unsigned long k, map_addr;
- if (eppnt->p_flags & PF_R)
- elf_prot = PROT_READ;
- if (eppnt->p_flags & PF_W)
- elf_prot |= PROT_WRITE;
- if (eppnt->p_flags & PF_X)
- elf_prot |= PROT_EXEC;
vaddr = eppnt->p_vaddr;
if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
elf_type |= MAP_FIXED_NOREPLACE;
@@ -687,7 +694,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
struct file *interpreter = NULL; /* to shut gcc up */
unsigned long load_addr = 0, load_bias = 0;
int load_addr_set = 0;
- char * elf_interpreter = NULL;
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
unsigned long elf_bss, elf_brk;
@@ -698,13 +704,12 @@ static int load_elf_binary(struct linux_binprm *bprm)
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc __maybe_unused = 0;
int executable_stack = EXSTACK_DEFAULT;
- struct pt_regs *regs = current_pt_regs();
struct {
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
} *loc;
struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
- loff_t pos;
+ struct pt_regs *regs;
loc = kmalloc(sizeof(*loc), GFP_KERNEL);
if (!loc) {
@@ -734,69 +739,66 @@ static int load_elf_binary(struct linux_binprm *bprm)
goto out;
elf_ppnt = elf_phdata;
- elf_bss = 0;
- elf_brk = 0;
+ for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
+ char *elf_interpreter;
+ loff_t pos;
- start_code = ~0UL;
- end_code = 0;
- start_data = 0;
- end_data = 0;
+ if (elf_ppnt->p_type != PT_INTERP)
+ continue;
- for (i = 0; i < loc->elf_ex.e_phnum; i++) {
- if (elf_ppnt->p_type == PT_INTERP) {
- /* This is the program interpreter used for
- * shared libraries - for now assume that this
- * is an a.out format binary
- */
- retval = -ENOEXEC;
- if (elf_ppnt->p_filesz > PATH_MAX ||
- elf_ppnt->p_filesz < 2)
- goto out_free_ph;
-
- retval = -ENOMEM;
- elf_interpreter = kmalloc(elf_ppnt->p_filesz,
- GFP_KERNEL);
- if (!elf_interpreter)
- goto out_free_ph;
-
- pos = elf_ppnt->p_offset;
- retval = kernel_read(bprm->file, elf_interpreter,
- elf_ppnt->p_filesz, &pos);
- if (retval != elf_ppnt->p_filesz) {
- if (retval >= 0)
- retval = -EIO;
- goto out_free_interp;
- }
- /* make sure path is NULL terminated */
- retval = -ENOEXEC;
- if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
- goto out_free_interp;
+ /*
+ * This is the program interpreter used for shared libraries -
+ * for now assume that this is an a.out format binary.
+ */
+ retval = -ENOEXEC;
+ if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
+ goto out_free_ph;
- interpreter = open_exec(elf_interpreter);
- retval = PTR_ERR(interpreter);
- if (IS_ERR(interpreter))
- goto out_free_interp;
+ retval = -ENOMEM;
+ elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
+ if (!elf_interpreter)
+ goto out_free_ph;
- /*
- * If the binary is not readable then enforce
- * mm->dumpable = 0 regardless of the interpreter's
- * permissions.
- */
- would_dump(bprm, interpreter);
-
- /* Get the exec headers */
- pos = 0;
- retval = kernel_read(interpreter, &loc->interp_elf_ex,
- sizeof(loc->interp_elf_ex), &pos);
- if (retval != sizeof(loc->interp_elf_ex)) {
- if (retval >= 0)
- retval = -EIO;
- goto out_free_dentry;
- }
+ pos = elf_ppnt->p_offset;
+ retval = kernel_read(bprm->file, elf_interpreter,
+ elf_ppnt->p_filesz, &pos);
+ if (retval != elf_ppnt->p_filesz) {
+ if (retval >= 0)
+ retval = -EIO;
+ goto out_free_interp;
+ }
+ /* make sure path is NULL terminated */
+ retval = -ENOEXEC;
+ if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
+ goto out_free_interp;
- break;
+ interpreter = open_exec(elf_interpreter);
+ kfree(elf_interpreter);
+ retval = PTR_ERR(interpreter);
+ if (IS_ERR(interpreter))
+ goto out_free_ph;
+
+ /*
+ * If the binary is not readable then enforce mm->dumpable = 0
+ * regardless of the interpreter's permissions.
+ */
+ would_dump(bprm, interpreter);
+
+ /* Get the exec headers */
+ pos = 0;
+ retval = kernel_read(interpreter, &loc->interp_elf_ex,
+ sizeof(loc->interp_elf_ex), &pos);
+ if (retval != sizeof(loc->interp_elf_ex)) {
+ if (retval >= 0)
+ retval = -EIO;
+ goto out_free_dentry;
}
- elf_ppnt++;
+
+ break;
+
+out_free_interp:
+ kfree(elf_interpreter);
+ goto out_free_ph;
}
elf_ppnt = elf_phdata;
@@ -819,7 +821,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
}
/* Some simple consistency checks for the interpreter */
- if (elf_interpreter) {
+ if (interpreter) {
retval = -ELIBBAD;
/* Not an ELF interpreter */
if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
@@ -884,13 +886,19 @@ static int load_elf_binary(struct linux_binprm *bprm)
if (retval < 0)
goto out_free_dentry;
- current->mm->start_stack = bprm->p;
+ elf_bss = 0;
+ elf_brk = 0;
+
+ start_code = ~0UL;
+ end_code = 0;
+ start_data = 0;
+ end_data = 0;
/* Now we do a little grungy work by mmapping the ELF image into
the correct location in memory. */
for(i = 0, elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
- int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
+ int elf_prot, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
unsigned long k, vaddr;
unsigned long total_size = 0;
@@ -931,12 +939,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
elf_fixed = MAP_FIXED;
}
- if (elf_ppnt->p_flags & PF_R)
- elf_prot |= PROT_READ;
- if (elf_ppnt->p_flags & PF_W)
- elf_prot |= PROT_WRITE;
- if (elf_ppnt->p_flags & PF_X)
- elf_prot |= PROT_EXEC;
+ elf_prot = make_prot(elf_ppnt->p_flags);
elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
@@ -978,7 +981,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
* independently randomized mmap region (0 load_bias
* without MAP_FIXED).
*/
- if (elf_interpreter) {
+ if (interpreter) {
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
@@ -1076,7 +1079,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
goto out_free_dentry;
}
- if (elf_interpreter) {
+ if (interpreter) {
unsigned long interp_map_addr = 0;
elf_entry = load_elf_interp(&loc->interp_elf_ex,
@@ -1100,7 +1103,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
allow_write_access(interpreter);
fput(interpreter);
- kfree(elf_interpreter);
} else {
elf_entry = loc->elf_ex.e_entry;
if (BAD_ADDR(elf_entry)) {
@@ -1115,7 +1117,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
set_binfmt(&elf_format);
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
- retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
+ retval = arch_setup_additional_pages(bprm, !!interpreter);
if (retval < 0)
goto out;
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
@@ -1132,6 +1134,17 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->mm->start_stack = bprm->p;
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
+ /*
+ * For architectures with ELF randomization, when executing
+ * a loader directly (i.e. no interpreter listed in ELF
+ * headers), move the brk area out of the mmap region
+ * (since it grows up, and may collide early with the stack
+ * growing down), and into the unused ELF_ET_DYN_BASE region.
+ */
+ if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && !interpreter)
+ current->mm->brk = current->mm->start_brk =
+ ELF_ET_DYN_BASE;
+
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
#ifdef compat_brk_randomized
@@ -1148,6 +1161,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
MAP_FIXED | MAP_PRIVATE, 0);
}
+ regs = current_pt_regs();
#ifdef ELF_PLAT_INIT
/*
* The ABI may specify that certain registers be set up in special
@@ -1176,8 +1190,6 @@ out_free_dentry:
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
-out_free_interp:
- kfree(elf_interpreter);
out_free_ph:
kfree(elf_phdata);
goto out;
@@ -1456,8 +1468,6 @@ static void fill_elf_header(struct elfhdr *elf, int segs,
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize = sizeof(struct elf_phdr);
elf->e_phnum = segs;
-
- return;
}
static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
@@ -1470,7 +1480,6 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
phdr->p_memsz = 0;
phdr->p_flags = 0;
phdr->p_align = 0;
- return;
}
static void fill_note(struct memelfnote *note, const char *name, int type,
@@ -1480,7 +1489,6 @@ static void fill_note(struct memelfnote *note, const char *name, int type,
note->type = type;
note->datasz = sz;
note->data = data;
- return;
}
/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f80045048bb7..0f7552a87d54 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -29,7 +29,6 @@
#include <linux/namei.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
-#include <linux/dax.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/falloc.h>
#include <linux/uaccess.h>
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 1645fcfd9691..d27720cd3664 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -20,7 +20,6 @@
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/slab.h>
-#include <linux/xattr.h>
#include "internal.h"
#define CACHEFILES_KEYBUF_SIZE 512
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 36a8dc699448..72f8e1311392 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -892,8 +892,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
int have = ci->i_snap_caps;
if ((have & mask) == mask) {
- dout("__ceph_caps_issued_mask %p snap issued %s"
- " (mask %s)\n", &ci->vfs_inode,
+ dout("__ceph_caps_issued_mask ino 0x%lx snap issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino,
ceph_cap_string(have),
ceph_cap_string(mask));
return 1;
@@ -904,8 +904,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
if (!__cap_is_valid(cap))
continue;
if ((cap->issued & mask) == mask) {
- dout("__ceph_caps_issued_mask %p cap %p issued %s"
- " (mask %s)\n", &ci->vfs_inode, cap,
+ dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino, cap,
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch)
@@ -916,8 +916,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
/* does a combination of caps satisfy mask? */
have |= cap->issued;
if ((have & mask) == mask) {
- dout("__ceph_caps_issued_mask %p combo issued %s"
- " (mask %s)\n", &ci->vfs_inode,
+ dout("__ceph_caps_issued_mask ino 0x%lx combo issued %s"
+ " (mask %s)\n", ci->vfs_inode.i_ino,
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch) {
@@ -2257,8 +2257,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (datasync)
goto out;
- inode_lock(inode);
-
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -2273,7 +2271,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
}
- inode_unlock(inode);
out:
dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
return ret;
@@ -2528,9 +2525,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got,
* to (when applicable), and check against max_size here as well.
* Note that caller is responsible for ensuring max_size increases are
* requested from the MDS.
+ *
+ * Returns 0 if caps were not able to be acquired (yet), a 1 if they were,
+ * or a negative error code.
+ *
+ * FIXME: how does a 0 return differ from -EAGAIN?
*/
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
- loff_t endoff, bool nonblock, int *got, int *err)
+ loff_t endoff, bool nonblock, int *got)
{
struct inode *inode = &ci->vfs_inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
@@ -2550,8 +2552,7 @@ again:
if ((file_wanted & need) != need) {
dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
ceph_cap_string(need), ceph_cap_string(file_wanted));
- *err = -EBADF;
- ret = 1;
+ ret = -EBADF;
goto out_unlock;
}
@@ -2572,10 +2573,8 @@ again:
if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
inode, endoff, ci->i_max_size);
- if (endoff > ci->i_requested_max_size) {
- *err = -EAGAIN;
- ret = 1;
- }
+ if (endoff > ci->i_requested_max_size)
+ ret = -EAGAIN;
goto out_unlock;
}
/*
@@ -2610,8 +2609,7 @@ again:
* task isn't in TASK_RUNNING state
*/
if (nonblock) {
- *err = -EAGAIN;
- ret = 1;
+ ret = -EAGAIN;
goto out_unlock;
}
@@ -2640,8 +2638,7 @@ again:
if (session_readonly) {
dout("get_cap_refs %p needed %s but mds%d readonly\n",
inode, ceph_cap_string(need), ci->i_auth_cap->mds);
- *err = -EROFS;
- ret = 1;
+ ret = -EROFS;
goto out_unlock;
}
@@ -2650,16 +2647,14 @@ again:
if (READ_ONCE(mdsc->fsc->mount_state) ==
CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
- *err = -EIO;
- ret = 1;
+ ret = -EIO;
goto out_unlock;
}
mds_wanted = __ceph_caps_mds_wanted(ci, false);
if (need & ~(mds_wanted & need)) {
dout("get_cap_refs %p caps were dropped"
" (session killed?)\n", inode);
- *err = -ESTALE;
- ret = 1;
+ ret = -ESTALE;
goto out_unlock;
}
if (!(file_wanted & ~mds_wanted))
@@ -2710,7 +2705,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
bool nonblock, int *got)
{
- int ret, err = 0;
+ int ret;
BUG_ON(need & ~CEPH_CAP_FILE_RD);
BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
@@ -2718,15 +2713,8 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
if (ret < 0)
return ret;
- ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err);
- if (ret) {
- if (err == -EAGAIN) {
- ret = 0;
- } else if (err < 0) {
- ret = err;
- }
- }
- return ret;
+ ret = try_get_cap_refs(ci, need, want, 0, nonblock, got);
+ return ret == -EAGAIN ? 0 : ret;
}
/*
@@ -2737,7 +2725,7 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
loff_t endoff, int *got, struct page **pinned_page)
{
- int _got, ret, err = 0;
+ int _got, ret;
ret = ceph_pool_perm_check(ci, need);
if (ret < 0)
@@ -2747,21 +2735,19 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
if (endoff > 0)
check_max_size(&ci->vfs_inode, endoff);
- err = 0;
_got = 0;
ret = try_get_cap_refs(ci, need, want, endoff,
- false, &_got, &err);
- if (ret) {
- if (err == -EAGAIN)
- continue;
- if (err < 0)
- ret = err;
- } else {
+ false, &_got);
+ if (ret == -EAGAIN) {
+ continue;
+ } else if (!ret) {
+ int err;
+
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&ci->i_cap_wq, &wait);
- while (!try_get_cap_refs(ci, need, want, endoff,
- true, &_got, &err)) {
+ while (!(err = try_get_cap_refs(ci, need, want, endoff,
+ true, &_got))) {
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
@@ -2770,19 +2756,14 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
}
remove_wait_queue(&ci->i_cap_wq, &wait);
-
if (err == -EAGAIN)
continue;
- if (err < 0)
- ret = err;
}
- if (ret < 0) {
- if (err == -ESTALE) {
- /* session was killed, try renew caps */
- ret = ceph_renew_caps(&ci->vfs_inode);
- if (ret == 0)
- continue;
- }
+ if (ret == -ESTALE) {
+ /* session was killed, try renew caps */
+ ret = ceph_renew_caps(&ci->vfs_inode);
+ if (ret == 0)
+ continue;
return ret;
}
@@ -4099,7 +4080,7 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
}
/*
- * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it
+ * For a soon-to-be unlinked file, drop the LINK caps. If it
* looks like the link count will hit 0, drop any other caps (other
* than PIN) we don't specifically want (due to the file still being
* open).
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 98365e74cb4a..b3fc5fe26a1a 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -37,7 +37,7 @@ static int mdsmap_show(struct seq_file *s, void *p)
struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
int state = mdsmap->m_info[i].state;
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
- ceph_pr_addr(&addr->in_addr),
+ ceph_pr_addr(addr),
ceph_mds_state_name(state));
}
return 0;
@@ -88,7 +88,7 @@ static int mdsc_show(struct seq_file *s, void *p)
req->r_dentry,
path ? path : "");
spin_unlock(&req->r_dentry->d_lock);
- kfree(path);
+ ceph_mdsc_free_path(path, pathlen);
} else if (req->r_path1) {
seq_printf(s, " #%llx/%s", req->r_ino1.ino,
req->r_path1);
@@ -108,7 +108,7 @@ static int mdsc_show(struct seq_file *s, void *p)
req->r_old_dentry,
path ? path : "");
spin_unlock(&req->r_old_dentry->d_lock);
- kfree(path);
+ ceph_mdsc_free_path(path, pathlen);
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
if (req->r_ino2.ino)
seq_printf(s, " #%llx/%s", req->r_ino2.ino,
@@ -124,18 +124,48 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0;
}
+static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
+{
+ struct seq_file *s = p;
+
+ seq_printf(s, "0x%-17lx%-17s%-17s\n", inode->i_ino,
+ ceph_cap_string(cap->issued),
+ ceph_cap_string(cap->implemented));
+ return 0;
+}
+
static int caps_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
- int total, avail, used, reserved, min;
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ int total, avail, used, reserved, min, i;
ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
seq_printf(s, "total\t\t%d\n"
"avail\t\t%d\n"
"used\t\t%d\n"
"reserved\t%d\n"
- "min\t%d\n",
+ "min\t\t%d\n\n",
total, avail, used, reserved, min);
+ seq_printf(s, "ino issued implemented\n");
+ seq_printf(s, "-----------------------------------------------\n");
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *session;
+
+ session = __ceph_lookup_mds_session(mdsc, i);
+ if (!session)
+ continue;
+ mutex_unlock(&mdsc->mutex);
+ mutex_lock(&session->s_mutex);
+ ceph_iterate_session_caps(session, caps_show_cb, s);
+ mutex_unlock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ mutex_lock(&mdsc->mutex);
+ }
+ mutex_unlock(&mdsc->mutex);
+
return 0;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 3c59ad180ef0..d3ef7ee429ec 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -22,18 +22,77 @@ struct ceph_nfs_confh {
u64 ino, parent_ino;
} __attribute__ ((packed));
+/*
+ * fh for snapped inode
+ */
+struct ceph_nfs_snapfh {
+ u64 ino;
+ u64 snapid;
+ u64 parent_ino;
+ u32 hash;
+} __attribute__ ((packed));
+
+static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
+ struct inode *parent_inode)
+{
+ const static int snap_handle_length =
+ sizeof(struct ceph_nfs_snapfh) >> 2;
+ struct ceph_nfs_snapfh *sfh = (void *)rawfh;
+ u64 snapid = ceph_snap(inode);
+ int ret;
+ bool no_parent = true;
+
+ if (*max_len < snap_handle_length) {
+ *max_len = snap_handle_length;
+ ret = FILEID_INVALID;
+ goto out;
+ }
+
+ ret = -EINVAL;
+ if (snapid != CEPH_SNAPDIR) {
+ struct inode *dir;
+ struct dentry *dentry = d_find_alias(inode);
+ if (!dentry)
+ goto out;
+
+ rcu_read_lock();
+ dir = d_inode_rcu(dentry->d_parent);
+ if (ceph_snap(dir) != CEPH_SNAPDIR) {
+ sfh->parent_ino = ceph_ino(dir);
+ sfh->hash = ceph_dentry_hash(dir, dentry);
+ no_parent = false;
+ }
+ rcu_read_unlock();
+ dput(dentry);
+ }
+
+ if (no_parent) {
+ if (!S_ISDIR(inode->i_mode))
+ goto out;
+ sfh->parent_ino = sfh->ino;
+ sfh->hash = 0;
+ }
+ sfh->ino = ceph_ino(inode);
+ sfh->snapid = snapid;
+
+ *max_len = snap_handle_length;
+ ret = FILEID_BTRFS_WITH_PARENT;
+out:
+ dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
+ return ret;
+}
+
static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
struct inode *parent_inode)
{
+ const static int handle_length =
+ sizeof(struct ceph_nfs_fh) >> 2;
+ const static int connected_handle_length =
+ sizeof(struct ceph_nfs_confh) >> 2;
int type;
- struct ceph_nfs_fh *fh = (void *)rawfh;
- struct ceph_nfs_confh *cfh = (void *)rawfh;
- int connected_handle_length = sizeof(*cfh)/4;
- int handle_length = sizeof(*fh)/4;
- /* don't re-export snaps */
if (ceph_snap(inode) != CEPH_NOSNAP)
- return -EINVAL;
+ return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
if (parent_inode && (*max_len < connected_handle_length)) {
*max_len = connected_handle_length;
@@ -44,6 +103,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
}
if (parent_inode) {
+ struct ceph_nfs_confh *cfh = (void *)rawfh;
dout("encode_fh %llx with parent %llx\n",
ceph_ino(inode), ceph_ino(parent_inode));
cfh->ino = ceph_ino(inode);
@@ -51,6 +111,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
*max_len = connected_handle_length;
type = FILEID_INO32_GEN_PARENT;
} else {
+ struct ceph_nfs_fh *fh = (void *)rawfh;
dout("encode_fh %llx\n", ceph_ino(inode));
fh->ino = ceph_ino(inode);
*max_len = handle_length;
@@ -59,7 +120,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
return type;
}
-static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
+static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
@@ -81,7 +142,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
mask = CEPH_STAT_CAP_INODE;
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
mask |= CEPH_CAP_XATTR_SHARED;
- req->r_args.getattr.mask = cpu_to_le32(mask);
+ req->r_args.lookupino.mask = cpu_to_le32(mask);
req->r_ino1 = vino;
req->r_num_caps = 1;
@@ -91,16 +152,114 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
ihold(inode);
ceph_mdsc_put_request(req);
if (!inode)
- return ERR_PTR(-ESTALE);
- if (inode->i_nlink == 0) {
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
+ return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
}
+ return inode;
+}
+
+struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
+{
+ struct inode *inode = __lookup_inode(sb, ino);
+ if (IS_ERR(inode))
+ return inode;
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ return inode;
+}
+static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
+{
+ struct inode *inode = __lookup_inode(sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+ if (inode->i_nlink == 0) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
return d_obtain_alias(inode);
}
+static struct dentry *__snapfh_to_dentry(struct super_block *sb,
+ struct ceph_nfs_snapfh *sfh,
+ bool want_parent)
+{
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
+ struct ceph_mds_request *req;
+ struct inode *inode;
+ struct ceph_vino vino;
+ int mask;
+ int err;
+ bool unlinked = false;
+
+ if (want_parent) {
+ vino.ino = sfh->parent_ino;
+ if (sfh->snapid == CEPH_SNAPDIR)
+ vino.snap = CEPH_NOSNAP;
+ else if (sfh->ino == sfh->parent_ino)
+ vino.snap = CEPH_SNAPDIR;
+ else
+ vino.snap = sfh->snapid;
+ } else {
+ vino.ino = sfh->ino;
+ vino.snap = sfh->snapid;
+ }
+ inode = ceph_find_inode(sb, vino);
+ if (inode)
+ return d_obtain_alias(inode);
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
+ USE_ANY_MDS);
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+
+ mask = CEPH_STAT_CAP_INODE;
+ if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+ mask |= CEPH_CAP_XATTR_SHARED;
+ req->r_args.lookupino.mask = cpu_to_le32(mask);
+ if (vino.snap < CEPH_NOSNAP) {
+ req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
+ if (!want_parent && sfh->ino != sfh->parent_ino) {
+ req->r_args.lookupino.parent =
+ cpu_to_le64(sfh->parent_ino);
+ req->r_args.lookupino.hash =
+ cpu_to_le32(sfh->hash);
+ }
+ }
+
+ req->r_ino1 = vino;
+ req->r_num_caps = 1;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode = req->r_target_inode;
+ if (inode) {
+ if (vino.snap == CEPH_SNAPDIR) {
+ if (inode->i_nlink == 0)
+ unlinked = true;
+ inode = ceph_get_snapdir(inode);
+ } else if (ceph_snap(inode) == vino.snap) {
+ ihold(inode);
+ } else {
+ /* mds does not support lookup snapped inode */
+ err = -EOPNOTSUPP;
+ inode = NULL;
+ }
+ }
+ ceph_mdsc_put_request(req);
+
+ if (want_parent) {
+ dout("snapfh_to_parent %llx.%llx\n err=%d\n",
+ vino.ino, vino.snap, err);
+ } else {
+ dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
+ vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
+ }
+ if (!inode)
+ return ERR_PTR(-ESTALE);
+ /* see comments in ceph_get_parent() */
+ return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
+}
+
/*
* convert regular fh to dentry
*/
@@ -110,6 +269,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
{
struct ceph_nfs_fh *fh = (void *)fid->raw;
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+ struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
+ return __snapfh_to_dentry(sb, sfh, false);
+ }
+
if (fh_type != FILEID_INO32_GEN &&
fh_type != FILEID_INO32_GEN_PARENT)
return NULL;
@@ -163,13 +327,49 @@ static struct dentry *__get_parent(struct super_block *sb,
static struct dentry *ceph_get_parent(struct dentry *child)
{
- /* don't re-export snaps */
- if (ceph_snap(d_inode(child)) != CEPH_NOSNAP)
- return ERR_PTR(-EINVAL);
-
- dout("get_parent %p ino %llx.%llx\n",
- child, ceph_vinop(d_inode(child)));
- return __get_parent(child->d_sb, child, 0);
+ struct inode *inode = d_inode(child);
+ struct dentry *dn;
+
+ if (ceph_snap(inode) != CEPH_NOSNAP) {
+ struct inode* dir;
+ bool unlinked = false;
+ /* do not support non-directory */
+ if (!d_is_dir(child)) {
+ dn = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
+ if (IS_ERR(dir)) {
+ dn = ERR_CAST(dir);
+ goto out;
+ }
+ /* There can be multiple paths to access snapped inode.
+ * For simplicity, treat snapdir of head inode as parent */
+ if (ceph_snap(inode) != CEPH_SNAPDIR) {
+ struct inode *snapdir = ceph_get_snapdir(dir);
+ if (dir->i_nlink == 0)
+ unlinked = true;
+ iput(dir);
+ if (IS_ERR(snapdir)) {
+ dn = ERR_CAST(snapdir);
+ goto out;
+ }
+ dir = snapdir;
+ }
+ /* If directory has already been deleted, futher get_parent
+ * will fail. Do not mark snapdir dentry as disconnected,
+ * this prevent exportfs from doing futher get_parent. */
+ if (unlinked)
+ dn = d_obtain_root(dir);
+ else
+ dn = d_obtain_alias(dir);
+ } else {
+ dn = __get_parent(child->d_sb, child, 0);
+ }
+out:
+ dout("get_parent %p ino %llx.%llx err=%ld\n",
+ child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0));
+ return dn;
}
/*
@@ -182,6 +382,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
struct ceph_nfs_confh *cfh = (void *)fid->raw;
struct dentry *dentry;
+ if (fh_type == FILEID_BTRFS_WITH_PARENT) {
+ struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
+ return __snapfh_to_dentry(sb, sfh, true);
+ }
+
if (fh_type != FILEID_INO32_GEN_PARENT)
return NULL;
if (fh_len < sizeof(*cfh) / 4)
@@ -194,14 +399,115 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
return dentry;
}
+static int __get_snap_name(struct dentry *parent, char *name,
+ struct dentry *child)
+{
+ struct inode *inode = d_inode(child);
+ struct inode *dir = d_inode(parent);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_mds_request *req = NULL;
+ char *last_name = NULL;
+ unsigned next_offset = 2;
+ int err = -EINVAL;
+
+ if (ceph_ino(inode) != ceph_ino(dir))
+ goto out;
+ if (ceph_snap(inode) == CEPH_SNAPDIR) {
+ if (ceph_snap(dir) == CEPH_NOSNAP) {
+ strcpy(name, fsc->mount_options->snapdir_name);
+ err = 0;
+ }
+ goto out;
+ }
+ if (ceph_snap(dir) != CEPH_SNAPDIR)
+ goto out;
+
+ while (1) {
+ struct ceph_mds_reply_info_parsed *rinfo;
+ struct ceph_mds_reply_dir_entry *rde;
+ int i;
+
+ req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
+ USE_AUTH_MDS);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ req = NULL;
+ goto out;
+ }
+ err = ceph_alloc_readdir_reply_buffer(req, inode);
+ if (err)
+ goto out;
+
+ req->r_direct_mode = USE_AUTH_MDS;
+ req->r_readdir_offset = next_offset;
+ req->r_args.readdir.flags =
+ cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
+ if (last_name) {
+ req->r_path2 = last_name;
+ last_name = NULL;
+ }
+
+ req->r_inode = dir;
+ ihold(dir);
+ req->r_dentry = dget(parent);
+
+ inode_lock(dir);
+ err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
+ inode_unlock(dir);
+
+ if (err < 0)
+ goto out;
+
+ rinfo = &req->r_reply_info;
+ for (i = 0; i < rinfo->dir_nr; i++) {
+ rde = rinfo->dir_entries + i;
+ BUG_ON(!rde->inode.in);
+ if (ceph_snap(inode) ==
+ le64_to_cpu(rde->inode.in->snapid)) {
+ memcpy(name, rde->name, rde->name_len);
+ name[rde->name_len] = '\0';
+ err = 0;
+ goto out;
+ }
+ }
+
+ if (rinfo->dir_end)
+ break;
+
+ BUG_ON(rinfo->dir_nr <= 0);
+ rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
+ next_offset += rinfo->dir_nr;
+ last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
+ if (!last_name) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ceph_mdsc_put_request(req);
+ req = NULL;
+ }
+ err = -ENOENT;
+out:
+ if (req)
+ ceph_mdsc_put_request(req);
+ kfree(last_name);
+ dout("get_snap_name %p ino %llx.%llx err=%d\n",
+ child, ceph_vinop(inode), err);
+ return err;
+}
+
static int ceph_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
struct ceph_mds_client *mdsc;
struct ceph_mds_request *req;
+ struct inode *inode = d_inode(child);
int err;
- mdsc = ceph_inode_to_client(d_inode(child))->mdsc;
+ if (ceph_snap(inode) != CEPH_NOSNAP)
+ return __get_snap_name(parent, name, child);
+
+ mdsc = ceph_inode_to_client(inode)->mdsc;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
USE_ANY_MDS);
if (IS_ERR(req))
@@ -209,8 +515,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
inode_lock(d_inode(parent));
- req->r_inode = d_inode(child);
- ihold(d_inode(child));
+ req->r_inode = inode;
+ ihold(inode);
req->r_ino2 = ceph_vino(d_inode(parent));
req->r_parent = d_inode(parent);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
@@ -224,10 +530,10 @@ static int ceph_get_name(struct dentry *parent, char *name,
memcpy(name, rinfo->dname, rinfo->dname_len);
name[rinfo->dname_len] = 0;
dout("get_name %p ino %llx.%llx name %s\n",
- child, ceph_vinop(d_inode(child)), name);
+ child, ceph_vinop(inode), name);
} else {
dout("get_name %p ino %llx.%llx err %d\n",
- child, ceph_vinop(d_inode(child)), err);
+ child, ceph_vinop(inode), err);
}
ceph_mdsc_put_request(req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 84725b53ac21..305daf043eb0 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -929,7 +929,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n",
(write ? "write" : "read"), file, pos, (unsigned)count,
- snapc, snapc->seq);
+ snapc, snapc ? snapc->seq : 0);
ret = filemap_write_and_wait_range(inode->i_mapping,
pos, pos + count - 1);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 35dae6d5493a..f85355bf49c4 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2266,43 +2266,72 @@ int ceph_permission(struct inode *inode, int mask)
return err;
}
+/* Craft a mask of needed caps given a set of requested statx attrs. */
+static int statx_to_caps(u32 want)
+{
+ int mask = 0;
+
+ if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME))
+ mask |= CEPH_CAP_AUTH_SHARED;
+
+ if (want & (STATX_NLINK|STATX_CTIME))
+ mask |= CEPH_CAP_LINK_SHARED;
+
+ if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
+ STATX_BLOCKS))
+ mask |= CEPH_CAP_FILE_SHARED;
+
+ if (want & (STATX_CTIME))
+ mask |= CEPH_CAP_XATTR_SHARED;
+
+ return mask;
+}
+
/*
- * Get all attributes. Hopefully somedata we'll have a statlite()
- * and can limit the fields we require to be accurate.
+ * Get all the attributes. If we have sufficient caps for the requested attrs,
+ * then we can avoid talking to the MDS at all.
*/
int ceph_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
- int err;
+ int err = 0;
- err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false);
- if (!err) {
- generic_fillattr(inode, stat);
- stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
- if (ceph_snap(inode) == CEPH_NOSNAP)
- stat->dev = inode->i_sb->s_dev;
+ /* Skip the getattr altogether if we're asked not to sync */
+ if (!(flags & AT_STATX_DONT_SYNC)) {
+ err = ceph_do_getattr(inode, statx_to_caps(request_mask),
+ flags & AT_STATX_FORCE_SYNC);
+ if (err)
+ return err;
+ }
+
+ generic_fillattr(inode, stat);
+ stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
+ if (ceph_snap(inode) == CEPH_NOSNAP)
+ stat->dev = inode->i_sb->s_dev;
+ else
+ stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
+
+ if (S_ISDIR(inode->i_mode)) {
+ if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
+ RBYTES))
+ stat->size = ci->i_rbytes;
else
- stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
-
- if (S_ISDIR(inode->i_mode)) {
- if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
- RBYTES))
- stat->size = ci->i_rbytes;
- else
- stat->size = ci->i_files + ci->i_subdirs;
- stat->blocks = 0;
- stat->blksize = 65536;
- /*
- * Some applications rely on the number of st_nlink
- * value on directories to be either 0 (if unlinked)
- * or 2 + number of subdirectories.
- */
- if (stat->nlink == 1)
- /* '.' + '..' + subdirs */
- stat->nlink = 1 + 1 + ci->i_subdirs;
- }
+ stat->size = ci->i_files + ci->i_subdirs;
+ stat->blocks = 0;
+ stat->blksize = 65536;
+ /*
+ * Some applications rely on the number of st_nlink
+ * value on directories to be either 0 (if unlinked)
+ * or 2 + number of subdirectories.
+ */
+ if (stat->nlink == 1)
+ /* '.' + '..' + subdirs */
+ stat->nlink = 1 + 1 + ci->i_subdirs;
}
+
+ /* Mask off any higher bits (e.g. btime) until we have support */
+ stat->result_mask = request_mask & STATX_BASIC_STATS;
return err;
}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 9dae2ec7e1fa..ac9b53b89365 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -237,15 +237,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
err = -EIO;
- } else if (op == CEPH_MDS_OP_SETFILELOCK) {
- /*
- * increasing i_filelock_ref closes race window between
- * handling request reply and adding file_lock struct to
- * inode. Otherwise, i_auth_cap may get trimmed in the
- * window. Caller function will decrease the counter.
- */
- fl->fl_ops = &ceph_fl_lock_ops;
- atomic_inc(&ci->i_filelock_ref);
}
spin_unlock(&ci->i_ceph_lock);
if (err < 0) {
@@ -299,10 +290,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
err = -EIO;
- } else {
- /* see comment in ceph_lock */
- fl->fl_ops = &ceph_fl_lock_ops;
- atomic_inc(&ci->i_filelock_ref);
}
spin_unlock(&ci->i_ceph_lock);
if (err < 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9049c2a3e972..959b1bf7c327 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -550,15 +550,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
int mds)
{
- struct ceph_mds_session *session;
-
if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
return NULL;
- session = mdsc->sessions[mds];
- dout("lookup_mds_session %p %d\n", session,
- refcount_read(&session->s_ref));
- get_session(session);
- return session;
+ return get_session(mdsc->sessions[mds]);
}
static bool __have_session(struct ceph_mds_client *mdsc, int mds)
@@ -1284,9 +1278,9 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
*
* Caller must hold session s_mutex.
*/
-static int iterate_session_caps(struct ceph_mds_session *session,
- int (*cb)(struct inode *, struct ceph_cap *,
- void *), void *arg)
+int ceph_iterate_session_caps(struct ceph_mds_session *session,
+ int (*cb)(struct inode *, struct ceph_cap *,
+ void *), void *arg)
{
struct list_head *p;
struct ceph_cap *cap;
@@ -1451,7 +1445,7 @@ static void remove_session_caps(struct ceph_mds_session *session)
LIST_HEAD(dispose);
dout("remove_session_caps on %p\n", session);
- iterate_session_caps(session, remove_session_caps_cb, fsc);
+ ceph_iterate_session_caps(session, remove_session_caps_cb, fsc);
wake_up_all(&fsc->mdsc->cap_flushing_wq);
@@ -1534,8 +1528,8 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
{
dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
- iterate_session_caps(session, wake_up_session_cb,
- (void *)(unsigned long)ev);
+ ceph_iterate_session_caps(session, wake_up_session_cb,
+ (void *)(unsigned long)ev);
}
/*
@@ -1768,7 +1762,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
session->s_mds, session->s_nr_caps, max_caps, trim_caps);
if (trim_caps > 0) {
session->s_trim_caps = trim_caps;
- iterate_session_caps(session, trim_caps_cb, session);
+ ceph_iterate_session_caps(session, trim_caps_cb, session);
dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
session->s_mds, session->s_nr_caps, max_caps,
trim_caps - session->s_trim_caps);
@@ -1861,7 +1855,8 @@ again:
num_cap_releases--;
head = msg->front.iov_base;
- le32_add_cpu(&head->num, 1);
+ put_unaligned_le32(get_unaligned_le32(&head->num) + 1,
+ &head->num);
item = msg->front.iov_base + msg->front.iov_len;
item->ino = cpu_to_le64(cap->cap_ino);
item->cap_id = cpu_to_le64(cap->cap_id);
@@ -2089,43 +2084,29 @@ static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
* Encode hidden .snap dirs as a double /, i.e.
* foo/.snap/bar -> foo//bar
*/
-char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
+char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
int stop_on_nosnap)
{
struct dentry *temp;
char *path;
- int len, pos;
+ int pos;
unsigned seq;
+ u64 base;
if (!dentry)
return ERR_PTR(-EINVAL);
-retry:
- len = 0;
- seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
- for (temp = dentry; !IS_ROOT(temp);) {
- struct inode *inode = d_inode(temp);
- if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
- len++; /* slash only */
- else if (stop_on_nosnap && inode &&
- ceph_snap(inode) == CEPH_NOSNAP)
- break;
- else
- len += 1 + temp->d_name.len;
- temp = temp->d_parent;
- }
- rcu_read_unlock();
- if (len)
- len--; /* no leading '/' */
-
- path = kmalloc(len+1, GFP_NOFS);
+ path = __getname();
if (!path)
return ERR_PTR(-ENOMEM);
- pos = len;
- path[pos] = 0; /* trailing null */
+retry:
+ pos = PATH_MAX - 1;
+ path[pos] = '\0';
+
+ seq = read_seqbegin(&rename_lock);
rcu_read_lock();
- for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) {
+ temp = dentry;
+ for (;;) {
struct inode *inode;
spin_lock(&temp->d_lock);
@@ -2143,83 +2124,54 @@ retry:
spin_unlock(&temp->d_lock);
break;
}
- strncpy(path + pos, temp->d_name.name,
- temp->d_name.len);
+ memcpy(path + pos, temp->d_name.name, temp->d_name.len);
}
spin_unlock(&temp->d_lock);
- if (pos)
- path[--pos] = '/';
temp = temp->d_parent;
+
+ /* Are we at the root? */
+ if (IS_ROOT(temp))
+ break;
+
+ /* Are we out of buffer? */
+ if (--pos < 0)
+ break;
+
+ path[pos] = '/';
}
+ base = ceph_ino(d_inode(temp));
rcu_read_unlock();
- if (pos != 0 || read_seqretry(&rename_lock, seq)) {
+ if (pos < 0 || read_seqretry(&rename_lock, seq)) {
pr_err("build_path did not end path lookup where "
- "expected, namelen is %d, pos is %d\n", len, pos);
+ "expected, pos is %d\n", pos);
/* presumably this is only possible if racing with a
rename of one of the parent directories (we can not
lock the dentries above us to prevent this, but
retrying should be harmless) */
- kfree(path);
goto retry;
}
- *base = ceph_ino(d_inode(temp));
- *plen = len;
+ *pbase = base;
+ *plen = PATH_MAX - 1 - pos;
dout("build_path on %p %d built %llx '%.*s'\n",
- dentry, d_count(dentry), *base, len, path);
- return path;
-}
-
-/* Duplicate the dentry->d_name.name safely */
-static int clone_dentry_name(struct dentry *dentry, const char **ppath,
- int *ppathlen)
-{
- u32 len;
- char *name;
-
-retry:
- len = READ_ONCE(dentry->d_name.len);
- name = kmalloc(len + 1, GFP_NOFS);
- if (!name)
- return -ENOMEM;
-
- spin_lock(&dentry->d_lock);
- if (dentry->d_name.len != len) {
- spin_unlock(&dentry->d_lock);
- kfree(name);
- goto retry;
- }
- memcpy(name, dentry->d_name.name, len);
- spin_unlock(&dentry->d_lock);
-
- name[len] = '\0';
- *ppath = name;
- *ppathlen = len;
- return 0;
+ dentry, d_count(dentry), base, *plen, path + pos);
+ return path + pos;
}
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
bool *pfreepath, bool parent_locked)
{
- int ret;
char *path;
rcu_read_lock();
if (!dir)
dir = d_inode_rcu(dentry->d_parent);
- if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
+ if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir);
rcu_read_unlock();
- if (parent_locked) {
- *ppath = dentry->d_name.name;
- *ppathlen = dentry->d_name.len;
- } else {
- ret = clone_dentry_name(dentry, ppath, ppathlen);
- if (ret)
- return ret;
- *pfreepath = true;
- }
+ *ppath = dentry->d_name.name;
+ *ppathlen = dentry->d_name.len;
return 0;
}
rcu_read_unlock();
@@ -2331,9 +2283,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
(!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_dentry_drop)
- len += req->r_dentry->d_name.len;
+ len += pathlen1;
if (req->r_old_dentry_drop)
- len += req->r_old_dentry->d_name.len;
+ len += pathlen2;
msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
if (!msg) {
@@ -2410,10 +2362,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
out_free2:
if (freepath2)
- kfree((char *)path2);
+ ceph_mdsc_free_path((char *)path2, pathlen2);
out_free1:
if (freepath1)
- kfree((char *)path1);
+ ceph_mdsc_free_path((char *)path1, pathlen1);
out:
return msg;
}
@@ -2427,8 +2379,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
{
if (req->r_callback)
req->r_callback(mdsc, req);
- else
- complete_all(&req->r_completion);
+ complete_all(&req->r_completion);
}
/*
@@ -2670,28 +2621,11 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
}
}
-void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
+int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
struct ceph_mds_request *req)
{
- dout("submit_request on %p\n", req);
- mutex_lock(&mdsc->mutex);
- __register_request(mdsc, req, NULL);
- __do_request(mdsc, req);
- mutex_unlock(&mdsc->mutex);
-}
-
-/*
- * Synchrously perform an mds request. Take care of all of the
- * session setup, forwarding, retry details.
- */
-int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
- struct inode *dir,
- struct ceph_mds_request *req)
-{
int err;
- dout("do_request on %p\n", req);
-
/* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
if (req->r_inode)
ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
@@ -2701,18 +2635,21 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
- /* issue */
+ dout("submit_request on %p for inode %p\n", req, dir);
mutex_lock(&mdsc->mutex);
__register_request(mdsc, req, dir);
__do_request(mdsc, req);
+ err = req->r_err;
+ mutex_unlock(&mdsc->mutex);
+ return err;
+}
- if (req->r_err) {
- err = req->r_err;
- goto out;
- }
+static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ int err;
/* wait */
- mutex_unlock(&mdsc->mutex);
dout("do_request waiting\n");
if (!req->r_timeout && req->r_wait_for_completion) {
err = req->r_wait_for_completion(mdsc, req);
@@ -2753,8 +2690,26 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
err = req->r_err;
}
-out:
mutex_unlock(&mdsc->mutex);
+ return err;
+}
+
+/*
+ * Synchrously perform an mds request. Take care of all of the
+ * session setup, forwarding, retry details.
+ */
+int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
+ struct inode *dir,
+ struct ceph_mds_request *req)
+{
+ int err;
+
+ dout("do_request on %p\n", req);
+
+ /* issue */
+ err = ceph_mdsc_submit_request(mdsc, dir, req);
+ if (!err)
+ err = ceph_mdsc_wait_request(mdsc, req);
dout("do_request %p done, result %d\n", req, err);
return err;
}
@@ -3485,7 +3440,7 @@ out_freeflocks:
ceph_pagelist_encode_string(pagelist, path, pathlen);
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
out_freepath:
- kfree(path);
+ ceph_mdsc_free_path(path, pathlen);
}
out_err:
@@ -3642,7 +3597,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
recon_state.msg_version = 2;
}
/* trsaverse this session's caps */
- err = iterate_session_caps(session, encode_caps_cb, &recon_state);
+ err = ceph_iterate_session_caps(session, encode_caps_cb, &recon_state);
spin_lock(&session->s_cap_lock);
session->s_cap_reconnect = 0;
@@ -4125,6 +4080,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
mdsc->max_sessions = 0;
mdsc->stopping = 0;
atomic64_set(&mdsc->quotarealms_count, 0);
+ mdsc->quotarealms_inodes = RB_ROOT;
+ mutex_init(&mdsc->quotarealms_inodes_mutex);
mdsc->last_snap_seq = 0;
init_rwsem(&mdsc->snap_rwsem);
mdsc->snap_realms = RB_ROOT;
@@ -4216,6 +4173,8 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
* their inode/dcache refs
*/
ceph_msgr_flush();
+
+ ceph_cleanup_quotarealms_inodes(mdsc);
}
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 50385a481fdb..a83f28bc2387 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -326,6 +326,18 @@ struct ceph_snapid_map {
};
/*
+ * node for list of quotarealm inodes that are not visible from the filesystem
+ * mountpoint, but required to handle, e.g. quotas.
+ */
+struct ceph_quotarealm_inode {
+ struct rb_node node;
+ u64 ino;
+ unsigned long timeout; /* last time a lookup failed for this inode */
+ struct mutex mutex;
+ struct inode *inode;
+};
+
+/*
* mds client state
*/
struct ceph_mds_client {
@@ -344,6 +356,12 @@ struct ceph_mds_client {
int stopping; /* true if shutting down */
atomic64_t quotarealms_count; /* # realms with quota */
+ /*
+ * We keep a list of inodes we don't see in the mountpoint but that we
+ * need to track quota realms.
+ */
+ struct rb_root quotarealms_inodes;
+ struct mutex quotarealms_inodes_mutex;
/*
* snap_rwsem will cover cap linkage into snaprealms, and
@@ -447,8 +465,9 @@ extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
struct inode *dir);
extern struct ceph_mds_request *
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
-extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
- struct ceph_mds_request *req);
+extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
+ struct inode *dir,
+ struct ceph_mds_request *req);
extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir,
struct ceph_mds_request *req);
@@ -468,8 +487,18 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
+extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
+ int (*cb)(struct inode *,
+ struct ceph_cap *, void *),
+ void *arg);
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
+static inline void ceph_mdsc_free_path(char *path, int len)
+{
+ if (path)
+ __putname(path - (PATH_MAX - 1 - len));
+}
+
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
int stop_on_nosnap);
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 1a2c5d390f7f..701b4fb0fb5a 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -205,7 +205,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
i+1, n, global_id, mds, inc,
- ceph_pr_addr(&addr.in_addr),
+ ceph_pr_addr(&addr),
ceph_mds_state_name(state));
if (mds < 0 || state <= 0)
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 9455d3aef0c3..c4522212872c 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
static inline bool ceph_has_realms_with_quotas(struct inode *inode)
{
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
- return atomic64_read(&mdsc->quotarealms_count) > 0;
+ struct super_block *sb = mdsc->fsc->sb;
+
+ if (atomic64_read(&mdsc->quotarealms_count) > 0)
+ return true;
+ /* if root is the real CephFS root, we don't have quota realms */
+ if (sb->s_root->d_inode &&
+ (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
+ return false;
+ /* otherwise, we can't know for sure */
+ return true;
}
void ceph_handle_quota(struct ceph_mds_client *mdsc,
@@ -68,6 +77,108 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
iput(inode);
}
+static struct ceph_quotarealm_inode *
+find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
+{
+ struct ceph_quotarealm_inode *qri = NULL;
+ struct rb_node **node, *parent = NULL;
+
+ mutex_lock(&mdsc->quotarealms_inodes_mutex);
+ node = &(mdsc->quotarealms_inodes.rb_node);
+ while (*node) {
+ parent = *node;
+ qri = container_of(*node, struct ceph_quotarealm_inode, node);
+
+ if (ino < qri->ino)
+ node = &((*node)->rb_left);
+ else if (ino > qri->ino)
+ node = &((*node)->rb_right);
+ else
+ break;
+ }
+ if (!qri || (qri->ino != ino)) {
+ /* Not found, create a new one and insert it */
+ qri = kmalloc(sizeof(*qri), GFP_KERNEL);
+ if (qri) {
+ qri->ino = ino;
+ qri->inode = NULL;
+ qri->timeout = 0;
+ mutex_init(&qri->mutex);
+ rb_link_node(&qri->node, parent, node);
+ rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
+ } else
+ pr_warn("Failed to alloc quotarealms_inode\n");
+ }
+ mutex_unlock(&mdsc->quotarealms_inodes_mutex);
+
+ return qri;
+}
+
+/*
+ * This function will try to lookup a realm inode which isn't visible in the
+ * filesystem mountpoint. A list of these kind of inodes (not visible) is
+ * maintained in the mdsc and freed only when the filesystem is umounted.
+ *
+ * Note that these inodes are kept in this list even if the lookup fails, which
+ * allows to prevent useless lookup requests.
+ */
+static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
+ struct super_block *sb,
+ struct ceph_snap_realm *realm)
+{
+ struct ceph_quotarealm_inode *qri;
+ struct inode *in;
+
+ qri = find_quotarealm_inode(mdsc, realm->ino);
+ if (!qri)
+ return NULL;
+
+ mutex_lock(&qri->mutex);
+ if (qri->inode) {
+ /* A request has already returned the inode */
+ mutex_unlock(&qri->mutex);
+ return qri->inode;
+ }
+ /* Check if this inode lookup has failed recently */
+ if (qri->timeout &&
+ time_before_eq(jiffies, qri->timeout)) {
+ mutex_unlock(&qri->mutex);
+ return NULL;
+ }
+ in = ceph_lookup_inode(sb, realm->ino);
+ if (IS_ERR(in)) {
+ pr_warn("Can't lookup inode %llx (err: %ld)\n",
+ realm->ino, PTR_ERR(in));
+ qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
+ } else {
+ qri->timeout = 0;
+ qri->inode = in;
+ }
+ mutex_unlock(&qri->mutex);
+
+ return in;
+}
+
+void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
+{
+ struct ceph_quotarealm_inode *qri;
+ struct rb_node *node;
+
+ /*
+ * It should now be safe to clean quotarealms_inode tree without holding
+ * mdsc->quotarealms_inodes_mutex...
+ */
+ mutex_lock(&mdsc->quotarealms_inodes_mutex);
+ while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
+ node = rb_first(&mdsc->quotarealms_inodes);
+ qri = rb_entry(node, struct ceph_quotarealm_inode, node);
+ rb_erase(node, &mdsc->quotarealms_inodes);
+ iput(qri->inode);
+ kfree(qri);
+ }
+ mutex_unlock(&mdsc->quotarealms_inodes_mutex);
+}
+
/*
* This function walks through the snaprealm for an inode and returns the
* ceph_snap_realm for the first snaprealm that has quotas set (either max_files
@@ -76,9 +187,15 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
*
* Note that the caller is responsible for calling ceph_put_snap_realm() on the
* returned realm.
+ *
+ * Callers of this function need to hold mdsc->snap_rwsem. However, if there's
+ * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence
+ * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
+ * this function will return -EAGAIN; otherwise, the snaprealms walk-through
+ * will be restarted.
*/
static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
- struct inode *inode)
+ struct inode *inode, bool retry)
{
struct ceph_inode_info *ci = NULL;
struct ceph_snap_realm *realm, *next;
@@ -88,6 +205,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
if (ceph_snap(inode) != CEPH_NOSNAP)
return NULL;
+restart:
realm = ceph_inode(inode)->i_snap_realm;
if (realm)
ceph_get_snap_realm(mdsc, realm);
@@ -95,11 +213,25 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
"null i_snap_realm\n", ceph_vinop(inode));
while (realm) {
+ bool has_inode;
+
spin_lock(&realm->inodes_with_caps_lock);
- in = realm->inode ? igrab(realm->inode) : NULL;
+ has_inode = realm->inode;
+ in = has_inode ? igrab(realm->inode) : NULL;
spin_unlock(&realm->inodes_with_caps_lock);
- if (!in)
+ if (has_inode && !in)
break;
+ if (!in) {
+ up_read(&mdsc->snap_rwsem);
+ in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
+ down_read(&mdsc->snap_rwsem);
+ if (IS_ERR_OR_NULL(in))
+ break;
+ ceph_put_snap_realm(mdsc, realm);
+ if (!retry)
+ return ERR_PTR(-EAGAIN);
+ goto restart;
+ }
ci = ceph_inode(in);
has_quota = __ceph_has_any_quota(ci);
@@ -125,9 +257,22 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
struct ceph_snap_realm *old_realm, *new_realm;
bool is_same;
+restart:
+ /*
+ * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
+ * However, get_quota_realm may drop it temporarily. By setting the
+ * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
+ * dropped and we can then restart the whole operation.
+ */
down_read(&mdsc->snap_rwsem);
- old_realm = get_quota_realm(mdsc, old);
- new_realm = get_quota_realm(mdsc, new);
+ old_realm = get_quota_realm(mdsc, old, true);
+ new_realm = get_quota_realm(mdsc, new, false);
+ if (PTR_ERR(new_realm) == -EAGAIN) {
+ up_read(&mdsc->snap_rwsem);
+ if (old_realm)
+ ceph_put_snap_realm(mdsc, old_realm);
+ goto restart;
+ }
is_same = (old_realm == new_realm);
up_read(&mdsc->snap_rwsem);
@@ -166,6 +311,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
return false;
down_read(&mdsc->snap_rwsem);
+restart:
realm = ceph_inode(inode)->i_snap_realm;
if (realm)
ceph_get_snap_realm(mdsc, realm);
@@ -173,12 +319,23 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
"null i_snap_realm\n", ceph_vinop(inode));
while (realm) {
+ bool has_inode;
+
spin_lock(&realm->inodes_with_caps_lock);
- in = realm->inode ? igrab(realm->inode) : NULL;
+ has_inode = realm->inode;
+ in = has_inode ? igrab(realm->inode) : NULL;
spin_unlock(&realm->inodes_with_caps_lock);
- if (!in)
+ if (has_inode && !in)
break;
-
+ if (!in) {
+ up_read(&mdsc->snap_rwsem);
+ in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
+ down_read(&mdsc->snap_rwsem);
+ if (IS_ERR_OR_NULL(in))
+ break;
+ ceph_put_snap_realm(mdsc, realm);
+ goto restart;
+ }
ci = ceph_inode(in);
spin_lock(&ci->i_ceph_lock);
if (op == QUOTA_CHECK_MAX_FILES_OP) {
@@ -314,7 +471,7 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
bool is_updated = false;
down_read(&mdsc->snap_rwsem);
- realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root));
+ realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
up_read(&mdsc->snap_rwsem);
if (!realm)
return false;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 285edda4fc3b..c864b44c8341 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -845,6 +845,12 @@ static void ceph_umount_begin(struct super_block *sb)
return;
}
+static int ceph_remount(struct super_block *sb, int *flags, char *data)
+{
+ sync_filesystem(sb);
+ return 0;
+}
+
static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode,
@@ -853,6 +859,7 @@ static const struct super_operations ceph_super_ops = {
.drop_inode = ceph_drop_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
+ .remount_fs = ceph_remount,
.show_options = ceph_show_options,
.statfs = ceph_statfs,
.umount_begin = ceph_umount_begin,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c5b4a05905c0..6edab9a750f8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1083,6 +1083,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/* export.c */
extern const struct export_operations ceph_export_ops;
+struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino);
/* locks.c */
extern __init void ceph_flock_init(void);
@@ -1133,5 +1134,6 @@ extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf);
+extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 7ede7306599f..1e21b2528cfb 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -77,7 +77,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
goto name_is_IP_address;
/* Perform the upcall */
- rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
+ rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL, false);
if (rc < 0)
cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n",
__func__, len, len, hostname);
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index c5234c21b539..f2bb7985d21c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -39,7 +39,6 @@
#include <linux/device.h>
#include <linux/pid_namespace.h>
#include <asm/io.h>
-#include <linux/poll.h>
#include <linux/uaccess.h>
#include <linux/coda.h>
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 591e82ba443c..5e7932d668ab 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1757,12 +1757,19 @@ int configfs_register_group(struct config_group *parent_group,
inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
ret = create_default_group(parent_group, group);
- if (!ret) {
- spin_lock(&configfs_dirent_lock);
- configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
- spin_unlock(&configfs_dirent_lock);
- }
+ if (ret)
+ goto err_out;
+
+ spin_lock(&configfs_dirent_lock);
+ configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
+ spin_unlock(&configfs_dirent_lock);
+ inode_unlock(d_inode(parent));
+ return 0;
+err_out:
inode_unlock(d_inode(parent));
+ mutex_lock(&subsys->su_mutex);
+ unlink_group(group);
+ mutex_unlock(&subsys->su_mutex);
return ret;
}
EXPORT_SYMBOL(configfs_register_group);
diff --git a/fs/dax.c b/fs/dax.c
index e5e54da1715f..f74386293632 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -814,7 +814,7 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
goto unlock_pmd;
flush_cache_page(vma, address, pfn);
- pmd = pmdp_huge_clear_flush(vma, address, pmdp);
+ pmd = pmdp_invalidate(vma, address, pmdp);
pmd = pmd_wrprotect(pmd);
pmd = pmd_mkclean(pmd);
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
@@ -1575,8 +1575,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
}
trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
- result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
- write);
+ result = vmf_insert_pfn_pmd(vmf, pfn, write);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
@@ -1686,8 +1685,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
#ifdef CONFIG_FS_DAX_PMD
else if (order == PMD_ORDER)
- ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
- pfn, true);
+ ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
#endif
else
ret = VM_FAULT_FALLBACK;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 08d3bd602f73..93b1fa7bb298 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -21,6 +21,9 @@
#include <linux/eventfd.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/idr.h>
+
+static DEFINE_IDA(eventfd_ida);
struct eventfd_ctx {
struct kref kref;
@@ -35,6 +38,7 @@ struct eventfd_ctx {
*/
__u64 count;
unsigned int flags;
+ int id;
};
/**
@@ -69,6 +73,8 @@ EXPORT_SYMBOL_GPL(eventfd_signal);
static void eventfd_free_ctx(struct eventfd_ctx *ctx)
{
+ if (ctx->id >= 0)
+ ida_simple_remove(&eventfd_ida, ctx->id);
kfree(ctx);
}
@@ -297,6 +303,7 @@ static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
seq_printf(m, "eventfd-count: %16llx\n",
(unsigned long long)ctx->count);
spin_unlock_irq(&ctx->wqh.lock);
+ seq_printf(m, "eventfd-id: %d\n", ctx->id);
}
#endif
@@ -400,6 +407,7 @@ static int do_eventfd(unsigned int count, int flags)
init_waitqueue_head(&ctx->wqh);
ctx->count = count;
ctx->flags = flags;
+ ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
diff --git a/fs/exec.c b/fs/exec.c
index 2e0033348d8e..d88584ebf07f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1652,11 +1652,13 @@ int search_binary_handler(struct linux_binprm *bprm)
if (!try_module_get(fmt->module))
continue;
read_unlock(&binfmt_lock);
+
bprm->recursion_depth++;
retval = fmt->load_binary(bprm);
+ bprm->recursion_depth--;
+
read_lock(&binfmt_lock);
put_binfmt(fmt);
- bprm->recursion_depth--;
if (retval < 0 && !bprm->mm) {
/* we got to flush_old_exec() and failed after it */
read_unlock(&binfmt_lock);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c27c27300d95..e474127dd255 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -451,7 +451,9 @@ failed_out:
/**
* ext2_alloc_branch - allocate and set up a chain of blocks.
* @inode: owner
- * @num: depth of the chain (number of blocks to allocate)
+ * @indirect_blks: depth of the chain (number of blocks to allocate)
+ * @blks: number of allocated direct blocks
+ * @goal: preferred place for allocation
* @offsets: offsets (in the blocks) to store the pointers to next.
* @branch: place to store the chain in.
*
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 63e599524085..217b290ae3a5 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -285,7 +285,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
/* assert(atomic_read(acl->a_refcount) == 1); */
FOREACH_ACL_ENTRY(pa, acl, pe) {
- switch(pa->e_tag) {
+ switch (pa->e_tag) {
case ACL_USER_OBJ:
pa->e_perm &= (mode >> 6) | ~S_IRWXO;
mode &= (pa->e_perm << 6) | ~S_IRWXU;
@@ -326,7 +326,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
}
*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
- return not_equiv;
+ return not_equiv;
}
static int f2fs_acl_create(struct inode *dir, umode_t *mode,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a98e1b02279e..ed70b68b2b38 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -66,7 +66,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
- .is_meta = is_meta,
+ .is_por = !is_meta,
};
int err;
@@ -130,6 +130,30 @@ struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, false);
}
+static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
+ int type)
+{
+ struct seg_entry *se;
+ unsigned int segno, offset;
+ bool exist;
+
+ if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ)
+ return true;
+
+ segno = GET_SEGNO(sbi, blkaddr);
+ offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
+ se = get_seg_entry(sbi, segno);
+
+ exist = f2fs_test_bit(offset, se->cur_valid_map);
+ if (!exist && type == DATA_GENERIC_ENHANCE) {
+ f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
+ "blkaddr:%u, sit bitmap:%d", blkaddr, exist);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ WARN_ON(1);
+ }
+ return exist;
+}
+
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
@@ -151,15 +175,22 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
return false;
break;
case META_POR:
+ if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+ blkaddr < MAIN_BLKADDR(sbi)))
+ return false;
+ break;
case DATA_GENERIC:
+ case DATA_GENERIC_ENHANCE:
+ case DATA_GENERIC_ENHANCE_READ:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi))) {
- if (type == DATA_GENERIC) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "access invalid blkaddr:%u", blkaddr);
- WARN_ON(1);
- }
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "access invalid blkaddr:%u", blkaddr);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ WARN_ON(1);
return false;
+ } else {
+ return __is_bitmap_valid(sbi, blkaddr, type);
}
break;
case META_GENERIC:
@@ -189,7 +220,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
.in_list = false,
- .is_meta = (type != META_POR),
+ .is_por = (type == META_POR),
};
struct blk_plug plug;
@@ -644,6 +675,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
+ if (bdev_read_only(sbi->sb->s_bdev)) {
+ f2fs_msg(sbi->sb, KERN_INFO, "write access "
+ "unavailable, skipping orphan cleanup");
+ return 0;
+ }
+
if (s_flags & SB_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
@@ -758,13 +795,27 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
}
}
+static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi,
+ struct f2fs_checkpoint *ckpt)
+{
+ unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset);
+ __u32 chksum;
+
+ chksum = f2fs_crc32(sbi, ckpt, chksum_ofs);
+ if (chksum_ofs < CP_CHKSUM_OFFSET) {
+ chksum_ofs += sizeof(chksum);
+ chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs,
+ F2FS_BLKSIZE - chksum_ofs);
+ }
+ return chksum;
+}
+
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
struct f2fs_checkpoint **cp_block, struct page **cp_page,
unsigned long long *version)
{
- unsigned long blk_size = sbi->blocksize;
size_t crc_offset = 0;
- __u32 crc = 0;
+ __u32 crc;
*cp_page = f2fs_get_meta_page(sbi, cp_addr);
if (IS_ERR(*cp_page))
@@ -773,15 +824,27 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
- if (crc_offset > (blk_size - sizeof(__le32))) {
+ if (crc_offset < CP_MIN_CHKSUM_OFFSET ||
+ crc_offset > CP_CHKSUM_OFFSET) {
f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
- crc = cur_cp_crc(*cp_block);
- if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
+ if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) {
+ if (crc_offset != CP_MIN_CHKSUM_OFFSET) {
+ f2fs_put_page(*cp_page, 1);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "layout of large_nat_bitmap is deprecated, "
+ "run fsck to repair, chksum_offset: %zu",
+ crc_offset);
+ return -EINVAL;
+ }
+ }
+
+ crc = f2fs_checkpoint_chksum(sbi, *cp_block);
+ if (crc != cur_cp_crc(*cp_block)) {
f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
@@ -1009,13 +1072,11 @@ retry:
if (inode) {
unsigned long cur_ino = inode->i_ino;
- if (is_dir)
- F2FS_I(inode)->cp_task = current;
+ F2FS_I(inode)->cp_task = current;
filemap_fdatawrite(inode->i_mapping);
- if (is_dir)
- F2FS_I(inode)->cp_task = NULL;
+ F2FS_I(inode)->cp_task = NULL;
iput(inode);
/* We need to give cpu to another writers. */
@@ -1391,7 +1452,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
- crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
+ crc32 = f2fs_checkpoint_chksum(sbi, ckpt);
*((__le32 *)((unsigned char *)ckpt +
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
@@ -1475,7 +1536,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+
+ spin_lock(&sbi->stat_lock);
sbi->unusable_block_count = 0;
+ spin_unlock(&sbi->stat_lock);
+
__set_cp_next_pack(sbi);
/*
@@ -1500,6 +1565,9 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned long long ckpt_ver;
int err = 0;
+ if (f2fs_readonly(sbi->sb) || f2fs_hw_is_readonly(sbi))
+ return -EROFS;
+
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (cpc->reason != CP_PAUSE)
return 0;
@@ -1516,10 +1584,6 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
err = -EIO;
goto out;
}
- if (f2fs_readonly(sbi->sb)) {
- err = -EROFS;
- goto out;
- }
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 64040e998439..eda4181d2092 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -218,12 +218,14 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
struct block_device *bdev = sbi->sb->s_bdev;
int i;
- for (i = 0; i < sbi->s_ndevs; i++) {
- if (FDEV(i).start_blk <= blk_addr &&
- FDEV(i).end_blk >= blk_addr) {
- blk_addr -= FDEV(i).start_blk;
- bdev = FDEV(i).bdev;
- break;
+ if (f2fs_is_multi_device(sbi)) {
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (FDEV(i).start_blk <= blk_addr &&
+ FDEV(i).end_blk >= blk_addr) {
+ blk_addr -= FDEV(i).start_blk;
+ bdev = FDEV(i).bdev;
+ break;
+ }
}
}
if (bio) {
@@ -237,6 +239,9 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
int i;
+ if (!f2fs_is_multi_device(sbi))
+ return 0;
+
for (i = 0; i < sbi->s_ndevs; i++)
if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
return i;
@@ -420,7 +425,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
{
- __submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
+ __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
}
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
@@ -448,7 +453,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
fio->encrypted_page : fio->page;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
- __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ fio->is_por ? META_POR : (__is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC_ENHANCE)))
return -EFAULT;
trace_f2fs_submit_page_bio(page, fio);
@@ -498,9 +504,7 @@ next:
spin_unlock(&io->io_lock);
}
- if (__is_valid_data_blkaddr(fio->old_blkaddr))
- verify_block_addr(fio, fio->old_blkaddr);
- verify_block_addr(fio, fio->new_blkaddr);
+ verify_fio_blkaddr(fio);
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
@@ -557,9 +561,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
- return ERR_PTR(-EFAULT);
-
bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio)
return ERR_PTR(-ENOMEM);
@@ -587,8 +588,10 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
- struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct bio *bio;
+ bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -600,8 +603,8 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
return -EFAULT;
}
ClearPageError(page);
- inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ inc_page_count(sbi, F2FS_RD_DATA);
+ __submit_bio(sbi, bio, DATA);
return 0;
}
@@ -729,6 +732,11 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ err = -EFAULT;
+ goto put_err;
+ }
goto got_it;
}
@@ -742,6 +750,13 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
err = -ENOENT;
goto put_err;
}
+ if (dn.data_blkaddr != NEW_ADDR &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ err = -EFAULT;
+ goto put_err;
+ }
got_it:
if (PageUptodate(page)) {
unlock_page(page);
@@ -1084,12 +1099,12 @@ next_block:
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
err = -EFAULT;
goto sync_out;
}
- if (is_valid_data_blkaddr(sbi, blkaddr)) {
+ if (__is_valid_data_blkaddr(blkaddr)) {
/* use out-place-update for driect IO under LFS mode */
if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
map->m_may_create) {
@@ -1499,6 +1514,118 @@ out:
return ret;
}
+static int f2fs_read_single_page(struct inode *inode, struct page *page,
+ unsigned nr_pages,
+ struct f2fs_map_blocks *map,
+ struct bio **bio_ret,
+ sector_t *last_block_in_bio,
+ bool is_readahead)
+{
+ struct bio *bio = *bio_ret;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ sector_t block_in_file;
+ sector_t last_block;
+ sector_t last_block_in_file;
+ sector_t block_nr;
+ int ret = 0;
+
+ block_in_file = (sector_t)page->index;
+ last_block = block_in_file + nr_pages;
+ last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
+ blkbits;
+ if (last_block > last_block_in_file)
+ last_block = last_block_in_file;
+
+ /* just zeroing out page which is beyond EOF */
+ if (block_in_file >= last_block)
+ goto zero_out;
+ /*
+ * Map blocks using the previous result first.
+ */
+ if ((map->m_flags & F2FS_MAP_MAPPED) &&
+ block_in_file > map->m_lblk &&
+ block_in_file < (map->m_lblk + map->m_len))
+ goto got_it;
+
+ /*
+ * Then do more f2fs_map_blocks() calls until we are
+ * done with this page.
+ */
+ map->m_lblk = block_in_file;
+ map->m_len = last_block - block_in_file;
+
+ ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
+ if (ret)
+ goto out;
+got_it:
+ if ((map->m_flags & F2FS_MAP_MAPPED)) {
+ block_nr = map->m_pblk + block_in_file - map->m_lblk;
+ SetPageMappedToDisk(page);
+
+ if (!PageUptodate(page) && !cleancache_get_page(page)) {
+ SetPageUptodate(page);
+ goto confused;
+ }
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ } else {
+zero_out:
+ zero_user_segment(page, 0, PAGE_SIZE);
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+ goto out;
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this
+ * BIO off first?
+ */
+ if (bio && (*last_block_in_bio != block_nr - 1 ||
+ !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
+submit_and_realloc:
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+ if (bio == NULL) {
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0);
+ if (IS_ERR(bio)) {
+ ret = PTR_ERR(bio);
+ bio = NULL;
+ goto out;
+ }
+ }
+
+ /*
+ * If the page is under writeback, we need to wait for
+ * its completion to see the correct decrypted data.
+ */
+ f2fs_wait_on_block_writeback(inode, block_nr);
+
+ if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ goto submit_and_realloc;
+
+ inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
+ ClearPageError(page);
+ *last_block_in_bio = block_nr;
+ goto out;
+confused:
+ if (bio) {
+ __submit_bio(F2FS_I_SB(inode), bio, DATA);
+ bio = NULL;
+ }
+ unlock_page(page);
+out:
+ *bio_ret = bio;
+ return ret;
+}
+
/*
* This function was originally taken from fs/mpage.c, and customized for f2fs.
* Major change was from block_size == page_size in f2fs by default.
@@ -1515,13 +1642,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
struct inode *inode = mapping->host;
- const unsigned blkbits = inode->i_blkbits;
- const unsigned blocksize = 1 << blkbits;
- sector_t block_in_file;
- sector_t last_block;
- sector_t last_block_in_file;
- sector_t block_nr;
struct f2fs_map_blocks map;
+ int ret = 0;
map.m_pblk = 0;
map.m_lblk = 0;
@@ -1544,98 +1666,13 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
goto next_page;
}
- block_in_file = (sector_t)page->index;
- last_block = block_in_file + nr_pages;
- last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
- blkbits;
- if (last_block > last_block_in_file)
- last_block = last_block_in_file;
-
- /* just zeroing out page which is beyond EOF */
- if (block_in_file >= last_block)
- goto zero_out;
- /*
- * Map blocks using the previous result first.
- */
- if ((map.m_flags & F2FS_MAP_MAPPED) &&
- block_in_file > map.m_lblk &&
- block_in_file < (map.m_lblk + map.m_len))
- goto got_it;
-
- /*
- * Then do more f2fs_map_blocks() calls until we are
- * done with this page.
- */
- map.m_lblk = block_in_file;
- map.m_len = last_block - block_in_file;
-
- if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT))
- goto set_error_page;
-got_it:
- if ((map.m_flags & F2FS_MAP_MAPPED)) {
- block_nr = map.m_pblk + block_in_file - map.m_lblk;
- SetPageMappedToDisk(page);
-
- if (!PageUptodate(page) && !cleancache_get_page(page)) {
- SetPageUptodate(page);
- goto confused;
- }
-
- if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
- DATA_GENERIC))
- goto set_error_page;
- } else {
-zero_out:
+ ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio,
+ &last_block_in_bio, is_readahead);
+ if (ret) {
+ SetPageError(page);
zero_user_segment(page, 0, PAGE_SIZE);
- if (!PageUptodate(page))
- SetPageUptodate(page);
unlock_page(page);
- goto next_page;
}
-
- /*
- * This page will go to BIO. Do we need to send this
- * BIO off first?
- */
- if (bio && (last_block_in_bio != block_nr - 1 ||
- !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
-submit_and_realloc:
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- if (bio == NULL) {
- bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0);
- if (IS_ERR(bio)) {
- bio = NULL;
- goto set_error_page;
- }
- }
-
- /*
- * If the page is under writeback, we need to wait for
- * its completion to see the correct decrypted data.
- */
- f2fs_wait_on_block_writeback(inode, block_nr);
-
- if (bio_add_page(bio, page, blocksize, 0) < blocksize)
- goto submit_and_realloc;
-
- inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
- ClearPageError(page);
- last_block_in_bio = block_nr;
- goto next_page;
-set_error_page:
- SetPageError(page);
- zero_user_segment(page, 0, PAGE_SIZE);
- unlock_page(page);
- goto next_page;
-confused:
- if (bio) {
- __submit_bio(F2FS_I_SB(inode), bio, DATA);
- bio = NULL;
- }
- unlock_page(page);
next_page:
if (pages)
put_page(page);
@@ -1643,7 +1680,7 @@ next_page:
BUG_ON(pages && !list_empty(pages));
if (bio)
__submit_bio(F2FS_I_SB(inode), bio, DATA);
- return 0;
+ return pages ? 0 : ret;
}
static int f2fs_read_data_page(struct file *file, struct page *page)
@@ -1813,7 +1850,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC))
+ DATA_GENERIC_ENHANCE))
return -EFAULT;
ipu_force = true;
@@ -1840,7 +1877,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
got_it:
if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC)) {
+ DATA_GENERIC_ENHANCE)) {
err = -EFAULT;
goto out_writepage;
}
@@ -1848,7 +1885,8 @@ got_it:
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
+ if (ipu_force ||
+ (__is_valid_data_blkaddr(fio->old_blkaddr) &&
need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
@@ -1866,9 +1904,10 @@ got_it:
true);
if (PageWriteback(page))
end_page_writeback(page);
+ } else {
+ set_inode_flag(inode, FI_UPDATE_WRITE);
}
trace_f2fs_do_write_data_page(fio->page, IPU);
- set_inode_flag(inode, FI_UPDATE_WRITE);
return err;
}
@@ -2030,7 +2069,8 @@ out:
}
unlock_page(page);
- if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
+ if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
+ !F2FS_I(inode)->cp_task)
f2fs_balance_fs(sbi, need_balance_fs);
if (unlikely(f2fs_cp_error(sbi))) {
@@ -2491,6 +2531,11 @@ repeat:
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
} else {
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE_READ)) {
+ err = -EFAULT;
+ goto fail;
+ }
err = f2fs_submit_page_read(inode, page, blkaddr);
if (err)
goto fail;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bacf5c2a8850..06b89a9862ab 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -210,7 +210,14 @@ enum {
META_SSA,
META_MAX,
META_POR,
- DATA_GENERIC,
+ DATA_GENERIC, /* check range only */
+ DATA_GENERIC_ENHANCE, /* strong check on range and segment bitmap */
+ DATA_GENERIC_ENHANCE_READ, /*
+ * strong check on range and segment
+ * bitmap but no warning due to race
+ * condition of read on truncated area
+ * by extent_cache
+ */
META_GENERIC,
};
@@ -1041,7 +1048,7 @@ struct f2fs_io_info {
bool submitted; /* indicate IO submission */
int need_lock; /* indicate we need to lock cp_rwsem */
bool in_list; /* indicate fio is in io_list */
- bool is_meta; /* indicate borrow meta inode mapping or not */
+ bool is_por; /* indicate IO is from recovery or not */
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
@@ -1068,8 +1075,8 @@ struct f2fs_dev_info {
block_t start_blk;
block_t end_blk;
#ifdef CONFIG_BLK_DEV_ZONED
- unsigned int nr_blkz; /* Total number of zones */
- u8 *blkz_type; /* Array of zones type */
+ unsigned int nr_blkz; /* Total number of zones */
+ unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
#endif
};
@@ -1366,6 +1373,17 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
}
#endif
+/*
+ * Test if the mounted volume is a multi-device volume.
+ * - For a single regular disk volume, sbi->s_ndevs is 0.
+ * - For a single zoned disk volume, sbi->s_ndevs is 1.
+ * - For a multi-device volume, sbi->s_ndevs is always 2 or more.
+ */
+static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
+{
+ return sbi->s_ndevs > 1;
+}
+
/* For write statistics. Suppose sector size is 512 bytes,
* and the return value is in kbytes. s is of struct f2fs_sb_info.
*/
@@ -1777,6 +1795,7 @@ enospc:
return -ENOSPC;
}
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
block_t count)
@@ -1785,13 +1804,21 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
+ if (unlikely(inode->i_blocks < sectors)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks,
+ (unsigned long long)sectors);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, count, false, true);
}
@@ -1889,7 +1916,11 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) {
offset = (flag == SIT_BITMAP) ?
le32_to_cpu(ckpt->nat_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+ /*
+ * if large_nat_bitmap feature is enabled, leave checksum
+ * protection for all nat/sit bitmaps.
+ */
+ return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32);
}
if (__cp_payload(sbi) > 0) {
@@ -2008,7 +2039,6 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, !sbi->total_valid_block_count);
f2fs_bug_on(sbi, !sbi->total_valid_node_count);
- f2fs_bug_on(sbi, !is_inode && !inode->i_blocks);
sbi->total_valid_node_count--;
sbi->total_valid_block_count--;
@@ -2018,10 +2048,19 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->stat_lock);
- if (is_inode)
+ if (is_inode) {
dquot_free_inode(inode);
- else
+ } else {
+ if (unlikely(inode->i_blocks == 0)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return;
+ }
f2fs_i_blocks_write(inode, 1, false, true);
+ }
}
static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
@@ -2545,7 +2584,14 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
static inline unsigned int addrs_per_inode(struct inode *inode)
{
- return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode);
+ unsigned int addrs = CUR_ADDRS_PER_INODE(inode) -
+ get_inline_xattr_addrs(inode);
+ return ALIGN_DOWN(addrs, 1);
+}
+
+static inline unsigned int addrs_per_block(struct inode *inode)
+{
+ return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1);
}
static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
@@ -2558,7 +2604,9 @@ static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
static inline int inline_xattr_size(struct inode *inode)
{
- return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ if (f2fs_has_inline_xattr(inode))
+ return get_inline_xattr_addrs(inode) * sizeof(__le32);
+ return 0;
}
static inline int f2fs_has_inline_data(struct inode *inode)
@@ -2800,12 +2848,10 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
-#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \
- (!is_read_io((fio)->op) || (fio)->is_meta))
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
@@ -2824,15 +2870,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr)
return true;
}
-static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
- block_t blkaddr)
-{
- if (!__is_valid_data_blkaddr(blkaddr))
- return false;
- verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
- return true;
-}
-
static inline void f2fs_set_page_private(struct page *page,
unsigned long data)
{
@@ -3530,16 +3567,12 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
#ifdef CONFIG_BLK_DEV_ZONED
-static inline int get_blkz_type(struct f2fs_sb_info *sbi,
- struct block_device *bdev, block_t blkaddr)
+static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
+ block_t blkaddr)
{
unsigned int zno = blkaddr >> sbi->log_blocks_per_blkz;
- int i;
- for (i = 0; i < sbi->s_ndevs; i++)
- if (FDEV(i).bdev == bdev)
- return FDEV(i).blkz_type[zno];
- return -EINVAL;
+ return test_bit(zno, FDEV(devi).blkz_seq);
}
#endif
@@ -3548,9 +3581,23 @@ static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi)
return f2fs_sb_has_blkzoned(sbi);
}
+static inline bool f2fs_bdev_support_discard(struct block_device *bdev)
+{
+ return blk_queue_discard(bdev_get_queue(bdev)) ||
+ bdev_is_zoned(bdev);
+}
+
static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi)
{
- return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev));
+ int i;
+
+ if (!f2fs_is_multi_device(sbi))
+ return f2fs_bdev_support_discard(sbi->sb->s_bdev);
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (f2fs_bdev_support_discard(FDEV(i).bdev))
+ return true;
+ return false;
}
static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi)
@@ -3559,6 +3606,20 @@ static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi)
f2fs_hw_should_discard(sbi);
}
+static inline bool f2fs_hw_is_readonly(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ if (!f2fs_is_multi_device(sbi))
+ return bdev_read_only(sbi->sb->s_bdev);
+
+ for (i = 0; i < sbi->s_ndevs; i++)
+ if (bdev_read_only(FDEV(i).bdev))
+ return true;
+ return false;
+}
+
+
static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
{
clear_opt(sbi, ADAPTIVE);
@@ -3614,7 +3675,7 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
if (f2fs_post_read_required(inode))
return true;
- if (sbi->s_ndevs)
+ if (f2fs_is_multi_device(sbi))
return true;
/*
* for blkzoned device, fallback direct IO to buffered IO, so
@@ -3651,4 +3712,4 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
return false;
}
-#endif
+#endif /* _LINUX_F2FS_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5742ab8b57dc..45b45f37d347 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -39,6 +39,8 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
ret = filemap_fault(vmf);
up_read(&F2FS_I(inode)->i_mmap_sem);
+ trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret);
+
return ret;
}
@@ -356,7 +358,7 @@ static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- is_valid_data_blkaddr(sbi, blkaddr))
+ __is_valid_data_blkaddr(blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -422,7 +424,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
- blkaddr, DATA_GENERIC)) {
+ blkaddr, DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
goto fail;
}
@@ -523,7 +525,8 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
f2fs_set_data_blkaddr(dn);
if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ !f2fs_is_valid_blkaddr(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE))
continue;
f2fs_invalidate_blocks(sbi, blkaddr);
@@ -552,7 +555,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
{
- f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
+ f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
}
static int truncate_partial_data_page(struct inode *inode, u64 from,
@@ -1006,7 +1009,8 @@ next_dnode:
} else if (ret == -ENOENT) {
if (dn.max_level == 0)
return -ENOENT;
- done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len);
+ done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node,
+ len);
blkaddr += done;
do_replace += done;
goto next;
@@ -1017,6 +1021,14 @@ next_dnode:
for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
*blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
+
+ if (__is_valid_data_blkaddr(*blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, *blkaddr,
+ DATA_GENERIC_ENHANCE)) {
+ f2fs_put_dnode(&dn);
+ return -EFAULT;
+ }
+
if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
if (test_opt(sbi, LFS)) {
@@ -1157,7 +1169,7 @@ static int __exchange_data_block(struct inode *src_inode,
int ret;
while (len) {
- olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
+ olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
array_size(olen, sizeof(block_t)),
@@ -2573,10 +2585,10 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
sizeof(range)))
return -EFAULT;
- if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
+ if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
__is_large_section(sbi)) {
f2fs_msg(sbi->sb, KERN_WARNING,
- "Can't flush %u in %d for segs_per_sec %u != 1\n",
+ "Can't flush %u in %d for segs_per_sec %u != 1",
range.dev_num, sbi->s_ndevs,
sbi->segs_per_sec);
return -EINVAL;
@@ -2858,7 +2870,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: Enable GC = ino %lx after %x GC trials\n",
+ "%s: Enable GC = ino %lx after %x GC trials",
__func__, inode->i_ino,
fi->i_gc_failures[GC_FAILURE_PIN]);
clear_inode_flag(inode, FI_PIN_FILE);
@@ -3035,15 +3047,21 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(file);
ssize_t ret;
- if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
- return -EIO;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ ret = -EIO;
+ goto out;
+ }
- if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
- return -EINVAL;
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) {
+ ret = -EINVAL;
+ goto out;
+ }
if (!inode_trylock(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
- return -EAGAIN;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
inode_lock(inode);
}
@@ -3056,19 +3074,16 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);
- if ((iocb->ki_flags & IOCB_NOWAIT) &&
- (iocb->ki_flags & IOCB_DIRECT)) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
+ if ((iocb->ki_flags & IOCB_NOWAIT)) {
+ if (!f2fs_overwrite_io(inode, iocb->ki_pos,
iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode,
- iocb, from)) {
- clear_inode_flag(inode,
- FI_NO_PREALLOC);
- inode_unlock(inode);
- return -EAGAIN;
- }
-
+ f2fs_has_inline_data(inode) ||
+ f2fs_force_buffered_io(inode, iocb, from)) {
+ clear_inode_flag(inode, FI_NO_PREALLOC);
+ inode_unlock(inode);
+ ret = -EAGAIN;
+ goto out;
+ }
} else {
preallocated = true;
target_size = iocb->ki_pos + iov_iter_count(from);
@@ -3077,7 +3092,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err) {
clear_inode_flag(inode, FI_NO_PREALLOC);
inode_unlock(inode);
- return err;
+ ret = err;
+ goto out;
}
}
ret = __generic_file_write_iter(iocb, from);
@@ -3091,7 +3107,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
}
inode_unlock(inode);
-
+out:
+ trace_f2fs_file_write_iter(inode, iocb->ki_pos,
+ iov_iter_count(from), ret);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 195cf0f9d9ef..963fb4571fd9 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -591,7 +591,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
- return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
+ return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode);
}
static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -656,6 +656,11 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC_ENHANCE_READ))) {
+ err = -EFAULT;
+ goto put_page;
+ }
goto got_it;
}
@@ -665,8 +670,12 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
goto put_page;
f2fs_put_dnode(&dn);
+ if (!__is_valid_data_blkaddr(dn.data_blkaddr)) {
+ err = -ENOENT;
+ goto put_page;
+ }
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
- DATA_GENERIC))) {
+ DATA_GENERIC_ENHANCE))) {
err = -EFAULT;
goto put_page;
}
@@ -1175,6 +1184,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
"type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_stop_checkpoint(sbi, false);
goto skip;
}
@@ -1346,7 +1356,7 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES;
/* give warm/cold data area from slower device */
- if (sbi->s_ndevs && !__is_large_section(sbi))
+ if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index bb6a152310ef..404d2462a0fe 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -420,6 +420,14 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
+ /*
+ * should retrieve reserved space which was used to keep
+ * inline_dentry's structure for backward compatibility.
+ */
+ if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) &&
+ !f2fs_has_inline_xattr(dir))
+ F2FS_I(dir)->i_inline_xattr_size = 0;
+
f2fs_i_depth_write(dir, 1);
if (i_size_read(dir) < PAGE_SIZE)
f2fs_i_size_write(dir, PAGE_SIZE);
@@ -501,6 +509,15 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
+
+ /*
+ * should retrieve reserved space which was used to keep
+ * inline_dentry's structure for backward compatibility.
+ */
+ if (!f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(dir)) &&
+ !f2fs_has_inline_xattr(dir))
+ F2FS_I(dir)->i_inline_xattr_size = 0;
+
kvfree(backup_dentry);
return 0;
recover:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index e7f2e8759315..ccb02226dd2c 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -73,7 +73,7 @@ static int __written_first_block(struct f2fs_sb_info *sbi,
if (!__is_valid_data_blkaddr(addr))
return 1;
- if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE))
return -EFAULT;
return 0;
}
@@ -177,8 +177,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
if (provided != calculated)
f2fs_msg(sbi->sb, KERN_WARNING,
- "checksum invalid, ino = %x, %x vs. %x",
- ino_of_node(page), provided, calculated);
+ "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
+ page->index, ino_of_node(page), provided, calculated);
return provided == calculated;
}
@@ -267,9 +267,10 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
if (ei->len &&
- (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk,
+ DATA_GENERIC_ENHANCE) ||
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
- DATA_GENERIC))) {
+ DATA_GENERIC_ENHANCE))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) extent info [%u, %u, %u] "
@@ -488,6 +489,7 @@ make_now:
return inode;
bad_inode:
+ f2fs_inode_synced(inode);
iget_failed(inode);
trace_f2fs_iget_exit(inode, ret);
return ERR_PTR(ret);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index c3e8a901d47a..0f77f9242751 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -143,7 +143,7 @@ fail_drop:
return ERR_PTR(err);
}
-static int is_extension_exist(const unsigned char *s, const char *sub)
+static inline int is_extension_exist(const unsigned char *s, const char *sub)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d6e48a6487d5..18a038a2a9fa 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -454,7 +454,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
+ f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -465,7 +465,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (!is_valid_data_blkaddr(sbi, new_blkaddr))
+ if (!__is_valid_data_blkaddr(new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -526,6 +526,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nat_entry ne;
struct nat_entry *e;
pgoff_t index;
+ block_t blkaddr;
int i;
ni->nid = nid;
@@ -569,6 +570,11 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
+ blkaddr = le32_to_cpu(ne.block_addr);
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
+ return -EFAULT;
+
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
return 0;
@@ -600,9 +606,9 @@ static void f2fs_ra_node_pages(struct page *parent, int start, int n)
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
{
const long direct_index = ADDRS_PER_INODE(dn->inode);
- const long direct_blks = ADDRS_PER_BLOCK;
- const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
- unsigned int skipped_unit = ADDRS_PER_BLOCK;
+ const long direct_blks = ADDRS_PER_BLOCK(dn->inode);
+ const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK;
+ unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode);
int cur_level = dn->cur_level;
int max_level = dn->max_level;
pgoff_t base = 0;
@@ -638,9 +644,9 @@ static int get_node_path(struct inode *inode, long block,
int offset[4], unsigned int noffset[4])
{
const long direct_index = ADDRS_PER_INODE(inode);
- const long direct_blks = ADDRS_PER_BLOCK;
+ const long direct_blks = ADDRS_PER_BLOCK(inode);
const long dptrs_per_blk = NIDS_PER_BLOCK;
- const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+ const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK;
const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
int n = 0;
int level = 0;
@@ -1181,8 +1187,14 @@ int f2fs_remove_inode_page(struct inode *inode)
f2fs_put_dnode(&dn);
return -EIO;
}
- f2fs_bug_on(F2FS_I_SB(inode),
- inode->i_blocks != 0 && inode->i_blocks != 8);
+
+ if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
+ f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
+ "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ }
/* will put inode & node pages */
err = truncate_node(&dn);
@@ -1277,9 +1289,10 @@ static int read_node_page(struct page *page, int op_flags)
int err;
if (PageUptodate(page)) {
-#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
-#endif
+ if (!f2fs_inode_chksum_verify(sbi, page)) {
+ ClearPageUptodate(page);
+ return -EBADMSG;
+ }
return LOCKED_PAGE;
}
@@ -1543,7 +1556,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
}
if (__is_valid_data_blkaddr(ni.blk_addr) &&
- !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
+ DATA_GENERIC_ENHANCE)) {
up_read(&sbi->node_write);
goto redirty_out;
}
@@ -2078,6 +2092,9 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
if (unlikely(nid == 0))
return false;
+ if (unlikely(f2fs_check_nid_range(sbi, nid)))
+ return false;
+
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = FREE_NID;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index e3883db868d8..e04f82b3f4fc 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -325,8 +325,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
break;
}
- if (!is_recoverable_dnode(page))
+ if (!is_recoverable_dnode(page)) {
+ f2fs_put_page(page, 1);
break;
+ }
if (!is_fsync_dnode(page))
goto next;
@@ -338,8 +340,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
err = f2fs_recover_inode_page(sbi, page);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
break;
+ }
quota_inode = true;
}
@@ -355,6 +359,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
err = 0;
goto next;
}
+ f2fs_put_page(page, 1);
break;
}
}
@@ -370,6 +375,7 @@ next:
"%s: detect looped node chain, "
"blkaddr:%u, next:%u",
__func__, blkaddr, next_blkaddr_of_node(page));
+ f2fs_put_page(page, 1);
err = -EINVAL;
break;
}
@@ -380,7 +386,6 @@ next:
f2fs_ra_meta_pages_cond(sbi, blkaddr);
}
- f2fs_put_page(page, 1);
return err;
}
@@ -546,7 +551,15 @@ retry_dn:
goto err;
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
- f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
+
+ if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+ inode->i_ino, ofs_of_node(dn.node_page),
+ ofs_of_node(page));
+ err = -EFAULT;
+ goto err;
+ }
for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest;
@@ -554,6 +567,18 @@ retry_dn:
src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
+ if (__is_valid_data_blkaddr(src) &&
+ !f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ if (__is_valid_data_blkaddr(dest) &&
+ !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
+ err = -EFAULT;
+ goto err;
+ }
+
/* skip recovering if dest is the same as src */
if (src == dest)
continue;
@@ -666,8 +691,10 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
*/
if (IS_INODE(page)) {
err = recover_inode(entry->inode, page);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
break;
+ }
}
if (entry->last_dentry == blkaddr) {
err = recover_dentry(entry->inode, page, dir_list);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index aa7fe79b62b2..8dee063c833f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -580,7 +580,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
int ret = 0;
int i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return __submit_flush_wait(sbi, sbi->sb->s_bdev);
for (i = 0; i < sbi->s_ndevs; i++) {
@@ -648,7 +648,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
return ret;
}
- if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
+ if (atomic_inc_return(&fcc->queued_flush) == 1 ||
+ f2fs_is_multi_device(sbi)) {
ret = submit_flush_wait(sbi, ino);
atomic_dec(&fcc->queued_flush);
@@ -754,7 +755,7 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
{
int ret = 0, i;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return 0;
for (i = 1; i < sbi->s_ndevs; i++) {
@@ -1367,9 +1368,12 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
{
block_t lblkstart = blkstart;
+ if (!f2fs_bdev_support_discard(bdev))
+ return 0;
+
trace_f2fs_queue_discard(bdev, blkstart, blklen);
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
int devi = f2fs_target_device_index(sbi, blkstart);
blkstart -= FDEV(devi).start_blk;
@@ -1732,42 +1736,36 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
block_t lblkstart = blkstart;
int devi = 0;
- if (sbi->s_ndevs) {
+ if (f2fs_is_multi_device(sbi)) {
devi = f2fs_target_device_index(sbi, blkstart);
+ if (blkstart < FDEV(devi).start_blk ||
+ blkstart > FDEV(devi).end_blk) {
+ f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
+ blkstart);
+ return -EIO;
+ }
blkstart -= FDEV(devi).start_blk;
}
- /*
- * We need to know the type of the zone: for conventional zones,
- * use regular discard if the drive supports it. For sequential
- * zones, reset the zone write pointer.
- */
- switch (get_blkz_type(sbi, bdev, blkstart)) {
-
- case BLK_ZONE_TYPE_CONVENTIONAL:
- if (!blk_queue_discard(bdev_get_queue(bdev)))
- return 0;
- return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
- case BLK_ZONE_TYPE_SEQWRITE_REQ:
- case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ /* For sequential zones, reset the zone write pointer */
+ if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
sector = SECTOR_FROM_BLOCK(blkstart);
nr_sects = SECTOR_FROM_BLOCK(blklen);
if (sector & (bdev_zone_sectors(bdev) - 1) ||
nr_sects != bdev_zone_sectors(bdev)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "(%d) %s: Unaligned discard attempted (block %x + %x)",
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
devi, sbi->s_ndevs ? FDEV(devi).path: "",
blkstart, blklen);
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_reset_zones(bdev, sector,
- nr_sects, GFP_NOFS);
- default:
- /* Unknown zone type: broken device ? */
- return -EIO;
+ return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
}
+
+ /* For conventional zones, use regular discard if supported */
+ return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
}
#endif
@@ -1775,8 +1773,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi) &&
- bdev_zoned_model(bdev) != BLK_ZONED_NONE)
+ if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
@@ -2172,8 +2169,11 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
* before, we must track that to know how much space we
* really have.
*/
- if (f2fs_test_bit(offset, se->ckpt_valid_map))
+ if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
+ spin_lock(&sbi->stat_lock);
sbi->unusable_block_count++;
+ spin_unlock(&sbi->stat_lock);
+ }
}
if (f2fs_test_and_clear_bit(offset, se->discard_map))
@@ -2220,7 +2220,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
struct seg_entry *se;
bool is_cp = false;
- if (!is_valid_data_blkaddr(sbi, blkaddr))
+ if (!__is_valid_data_blkaddr(blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -3089,7 +3089,7 @@ static void update_device_state(struct f2fs_io_info *fio)
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int devidx;
- if (!sbi->s_ndevs)
+ if (!f2fs_is_multi_device(sbi))
return;
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
@@ -3187,13 +3187,18 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
{
int err;
struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int segno;
fio->new_blkaddr = fio->old_blkaddr;
/* i/o temperature is needed for passing down write hints */
__get_segment_type(fio);
- f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
- GET_SEGNO(sbi, fio->new_blkaddr))->type));
+ segno = GET_SEGNO(sbi, fio->new_blkaddr);
+
+ if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ return -EFAULT;
+ }
stat_inc_inplace_blocks(fio->sbi);
@@ -3336,7 +3341,7 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
if (!f2fs_post_read_required(inode))
return;
- if (!is_valid_data_blkaddr(sbi, blkaddr))
+ if (!__is_valid_data_blkaddr(blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5c7ed0442d6e..429007b8036e 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -82,7 +82,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
+ ((!__is_valid_data_blkaddr(blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -656,14 +656,15 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
}
-static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
+static inline void verify_fio_blkaddr(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
- if (__is_meta_io(fio))
- verify_blkaddr(sbi, blk_addr, META_GENERIC);
- else
- verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
+ verify_blkaddr(sbi, fio->old_blkaddr, __is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC);
+ verify_blkaddr(sbi, fio->new_blkaddr, __is_meta_io(fio) ?
+ META_GENERIC : DATA_GENERIC_ENHANCE);
}
/*
@@ -672,7 +673,6 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
static inline int check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
-#ifdef CONFIG_F2FS_CHECK_FS
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
@@ -699,7 +699,7 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
set_sbi_flag(sbi, SBI_NEED_FSCK);
return -EINVAL;
}
-#endif
+
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4c55d2ea9df3..6b959bbb336a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1019,7 +1019,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
- kvfree(FDEV(i).blkz_type);
+ kvfree(FDEV(i).blkz_seq);
#endif
}
kvfree(sbi->devs);
@@ -1221,10 +1221,13 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
sbi->current_reserved_blocks;
+
+ spin_lock(&sbi->stat_lock);
if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
buf->f_bfree = 0;
else
buf->f_bfree -= sbi->unusable_block_count;
+ spin_unlock(&sbi->stat_lock);
if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
buf->f_bavail = buf->f_bfree -
@@ -1499,9 +1502,15 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
mutex_lock(&sbi->gc_mutex);
cpc.reason = CP_PAUSE;
set_sbi_flag(sbi, SBI_CP_DISABLED);
- f2fs_write_checkpoint(sbi, &cpc);
+ err = f2fs_write_checkpoint(sbi, &cpc);
+ if (err)
+ goto out_unlock;
+ spin_lock(&sbi->stat_lock);
sbi->unusable_block_count = 0;
+ spin_unlock(&sbi->stat_lock);
+
+out_unlock:
mutex_unlock(&sbi->gc_mutex);
restore_flag:
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2271,7 +2280,7 @@ static const struct export_operations f2fs_export_ops = {
static loff_t max_file_blocks(void)
{
loff_t result = 0;
- loff_t leaf_count = ADDRS_PER_BLOCK;
+ loff_t leaf_count = DEF_ADDRS_PER_BLOCK;
/*
* note: previously, result is equal to (DEF_ADDRS_PER_INODE -
@@ -2449,7 +2458,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
/* Currently, support only 4KB page cache size */
if (F2FS_BLKSIZE != PAGE_SIZE) {
f2fs_msg(sb, KERN_INFO,
- "Invalid page_cache_size (%lu), supports only 4KB\n",
+ "Invalid page_cache_size (%lu), supports only 4KB",
PAGE_SIZE);
return 1;
}
@@ -2458,7 +2467,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
if (blocksize != F2FS_BLKSIZE) {
f2fs_msg(sb, KERN_INFO,
- "Invalid blocksize (%u), supports only 4KB\n",
+ "Invalid blocksize (%u), supports only 4KB",
blocksize);
return 1;
}
@@ -2466,7 +2475,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
/* check log blocks per segment */
if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
f2fs_msg(sb, KERN_INFO,
- "Invalid log blocks per segment (%u)\n",
+ "Invalid log blocks per segment (%u)",
le32_to_cpu(raw_super->log_blocks_per_seg));
return 1;
}
@@ -2587,7 +2596,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int log_blocks_per_seg;
unsigned int segment_count_main;
unsigned int cp_pack_start_sum, cp_payload;
- block_t user_block_count;
+ block_t user_block_count, valid_user_blocks;
+ block_t avail_node_count, valid_node_count;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
@@ -2622,6 +2632,24 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ valid_user_blocks = le64_to_cpu(ckpt->valid_block_count);
+ if (valid_user_blocks > user_block_count) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong valid_user_blocks: %u, user_block_count: %u",
+ valid_user_blocks, user_block_count);
+ return 1;
+ }
+
+ valid_node_count = le32_to_cpu(ckpt->valid_node_count);
+ avail_node_count = sbi->total_node_count - sbi->nquota_files -
+ F2FS_RESERVED_NODE_NUM;
+ if (valid_node_count > avail_node_count) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong valid_node_count: %u, avail_node_count: %u",
+ valid_node_count, avail_node_count);
+ return 1;
+ }
+
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -2793,9 +2821,11 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
- FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz,
- GFP_KERNEL);
- if (!FDEV(devi).blkz_type)
+ FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
+ BITS_TO_LONGS(FDEV(devi).nr_blkz)
+ * sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!FDEV(devi).blkz_seq)
return -ENOMEM;
#define F2FS_REPORT_NR_ZONES 4096
@@ -2822,7 +2852,8 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
}
for (i = 0; i < nr_zones; i++) {
- FDEV(devi).blkz_type[n] = zones[i].type;
+ if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(n, FDEV(devi).blkz_seq);
sector += zones[i].len;
n++;
}
@@ -3105,7 +3136,7 @@ try_onemore:
#ifndef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_msg(sb, KERN_ERR,
- "Zoned block device support is not enabled\n");
+ "Zoned block device support is not enabled");
err = -EOPNOTSUPP;
goto free_sb_buf;
}
@@ -3350,10 +3381,17 @@ try_onemore:
* mount should be failed, when device has readonly mode, and
* previous checkpoint was not done by clean system shutdown.
*/
- if (bdev_read_only(sb->s_bdev) &&
- !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
- err = -EROFS;
- goto free_meta;
+ if (f2fs_hw_is_readonly(sbi)) {
+ if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ err = -EROFS;
+ f2fs_msg(sb, KERN_ERR,
+ "Need to recover fsync data, but "
+ "write access unavailable");
+ goto free_meta;
+ }
+ f2fs_msg(sbi->sb, KERN_INFO, "write access "
+ "unavailable, skipping recovery");
+ goto reset_checkpoint;
}
if (need_fsck)
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 848a785abe25..e791741d193b 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -202,12 +202,17 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index)
return handler;
}
-static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
- size_t len, const char *name)
+static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
+ void *last_base_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
+ if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
+ return NULL;
+
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
@@ -297,20 +302,22 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
const char *name, struct f2fs_xattr_entry **xe,
void **base_addr, int *base_size)
{
- void *cur_addr, *txattr_addr, *last_addr = NULL;
+ void *cur_addr, *txattr_addr, *last_txattr_addr;
+ void *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
- unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
- if (!size && !inline_size)
+ if (!xnid && !inline_size)
return -ENODATA;
- *base_size = inline_size + size + XATTR_PADDING_SIZE;
+ *base_size = XATTR_SIZE(xnid, inode) + XATTR_PADDING_SIZE;
txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), *base_size, GFP_NOFS);
if (!txattr_addr)
return -ENOMEM;
+ last_txattr_addr = (void *)txattr_addr + XATTR_SIZE(xnid, inode);
+
/* read from inline xattr */
if (inline_size) {
err = read_inline_xattr(inode, ipage, txattr_addr);
@@ -337,7 +344,11 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
else
cur_addr = txattr_addr;
- *xe = __find_xattr(cur_addr, index, len, name);
+ *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
+ if (!*xe) {
+ err = -EFAULT;
+ goto out;
+ }
check:
if (IS_XATTR_LAST_ENTRY(*xe)) {
err = -ENODATA;
@@ -581,7 +592,8 @@ static int __f2fs_setxattr(struct inode *inode, int index,
struct page *ipage, int flags)
{
struct f2fs_xattr_entry *here, *last;
- void *base_addr;
+ void *base_addr, *last_base_addr;
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int found, newsize;
size_t len;
__u32 new_hsize;
@@ -605,8 +617,14 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (error)
return error;
+ last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode);
+
/* find entry with wanted name. */
- here = __find_xattr(base_addr, index, len, name);
+ here = __find_xattr(base_addr, last_base_addr, index, len, name);
+ if (!here) {
+ error = -EFAULT;
+ goto exit;
+ }
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 9172ee082ca8..a90920e2f949 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -71,6 +71,8 @@ struct f2fs_xattr_entry {
entry = XATTR_NEXT_ENTRY(entry))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
+#define XATTR_SIZE(x,i) (((x) ? VALID_XATTR_BLOCK_SIZE : 0) + \
+ (inline_xattr_size(i)))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
diff --git a/fs/fat/file.c b/fs/fat/file.c
index b3bed32946b1..0e3ed79fcc3f 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -193,12 +193,17 @@ static int fat_file_release(struct inode *inode, struct file *filp)
int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
- int res, err;
+ int err;
+
+ err = __generic_file_fsync(filp, start, end, datasync);
+ if (err)
+ return err;
- res = generic_file_fsync(filp, start, end, datasync);
err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
+ if (err)
+ return err;
- return res ? res : err;
+ return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
}
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 3bb9c0c8cbcc..c2891e933ef1 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -92,7 +92,7 @@ static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
{
int fd;
- fd = anon_inode_getfd("fscontext", &fscontext_fops, fc,
+ fd = anon_inode_getfd("[fscontext]", &fscontext_fops, fc,
O_RDWR | o_flags);
if (fd < 0)
put_fs_context(fc);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index fe80bea4ad89..14ce1e47f980 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/fs_context.h>
#define FUSE_CTL_SUPER_MAGIC 0x65735543
@@ -317,7 +318,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
drop_nlink(d_inode(fuse_control_sb->s_root));
}
-static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
+static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
{
static const struct tree_descr empty_descr = {""};
struct fuse_conn *fc;
@@ -343,10 +344,19 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
return 0;
}
-static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data)
+static int fuse_ctl_get_tree(struct fs_context *fc)
{
- return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
+ return vfs_get_super(fc, vfs_get_single_super, fuse_ctl_fill_super);
+}
+
+static const struct fs_context_operations fuse_ctl_context_ops = {
+ .get_tree = fuse_ctl_get_tree,
+};
+
+static int fuse_ctl_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &fuse_ctl_context_ops;
+ return 0;
}
static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -365,7 +375,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
static struct file_system_type fuse_ctl_fs_type = {
.owner = THIS_MODULE,
.name = "fusectl",
- .mount = fuse_ctl_mount,
+ .init_fs_context = fuse_ctl_init_fs_context,
.kill_sb = fuse_ctl_kill_sb,
};
MODULE_ALIAS_FS("fusectl");
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 55a26f351467..4b41df1d4642 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -33,6 +33,8 @@
* closed.
*/
+#define pr_fmt(fmt) "CUSE: " fmt
+
#include <linux/fuse.h>
#include <linux/cdev.h>
#include <linux/device.h>
@@ -225,7 +227,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
return 0;
if (end[-1] != '\0') {
- printk(KERN_ERR "CUSE: info not properly terminated\n");
+ pr_err("info not properly terminated\n");
return -EINVAL;
}
@@ -242,7 +244,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
key = strstrip(key);
if (!strlen(key)) {
- printk(KERN_ERR "CUSE: zero length info key specified\n");
+ pr_err("zero length info key specified\n");
return -EINVAL;
}
@@ -282,12 +284,11 @@ static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
if (strcmp(key, "DEVNAME") == 0)
devinfo->name = val;
else
- printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
- key);
+ pr_warn("unknown device info \"%s\"\n", key);
}
if (!devinfo->name || !strlen(devinfo->name)) {
- printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
+ pr_err("DEVNAME unspecified\n");
return -EINVAL;
}
@@ -341,7 +342,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
else
rc = register_chrdev_region(devt, 1, devinfo.name);
if (rc) {
- printk(KERN_ERR "CUSE: failed to register chrdev region\n");
+ pr_err("failed to register chrdev region\n");
goto err;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9971a35cf1ef..24ea19cfe07e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -906,8 +906,8 @@ static int fuse_check_page(struct page *page)
1 << PG_lru |
1 << PG_active |
1 << PG_reclaim))) {
- printk(KERN_WARNING "fuse: trying to steal weird page\n");
- printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
+ pr_warn("trying to steal weird page\n");
+ pr_warn(" page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
return 1;
}
return 0;
@@ -1317,6 +1317,16 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
unsigned reqsize;
unsigned int hash;
+ /*
+ * Require sane minimum read buffer - that has capacity for fixed part
+ * of any request header + negotated max_write room for data. If the
+ * requirement is not satisfied return EINVAL to the filesystem server
+ * to indicate that it is not following FUSE server/client contract.
+ * Don't dequeue / abort any request.
+ */
+ if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, 4096 + fc->max_write))
+ return -EINVAL;
+
restart:
spin_lock(&fiq->waitq.lock);
err = -EAGAIN;
@@ -1749,7 +1759,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
offset = outarg->offset & ~PAGE_MASK;
file_size = i_size_read(inode);
- num = outarg->size;
+ num = min(outarg->size, fc->max_write);
if (outarg->offset > file_size)
num = 0;
else if (outarg->offset + num > file_size)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 06096b60f1df..3959f08279e6 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -178,7 +178,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
if (!(ff->open_flags & FOPEN_KEEP_CACHE))
invalidate_inode_pages2(inode->i_mapping);
- if (ff->open_flags & FOPEN_NONSEEKABLE)
+ if (ff->open_flags & FOPEN_STREAM)
+ stream_open(inode, file);
+ else if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -462,7 +464,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
- inarg.fsync_flags = datasync ? 1 : 0;
+ inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0;
args.in.h.opcode = opcode;
args.in.h.nodeid = get_node_id(inode);
args.in.numargs = 1;
@@ -1586,7 +1588,7 @@ __acquires(fi->lock)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- size_t crop = i_size_read(inode);
+ loff_t crop = i_size_read(inode);
struct fuse_req *req;
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
@@ -2576,8 +2578,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
#if BITS_PER_LONG == 32
inarg.flags |= FUSE_IOCTL_32BIT;
#else
- if (flags & FUSE_IOCTL_COMPAT)
+ if (flags & FUSE_IOCTL_COMPAT) {
inarg.flags |= FUSE_IOCTL_32BIT;
+#ifdef CONFIG_X86_X32
+ if (in_x32_syscall())
+ inarg.flags |= FUSE_IOCTL_COMPAT_X32;
+#endif
+ }
#endif
/* assume all the iovs returned by client always fits in a page */
@@ -3044,6 +3051,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
}
}
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + length > i_size_read(inode)) {
+ err = inode_newsize_ok(inode, offset + length);
+ if (err)
+ return err;
+ }
+
if (!(mode & FALLOC_FL_KEEP_SIZE))
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 0920c0c032a0..24dbca777775 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -9,6 +9,10 @@
#ifndef _FS_FUSE_I_H
#define _FS_FUSE_I_H
+#ifndef pr_fmt
+# define pr_fmt(fmt) "fuse: " fmt
+#endif
+
#include <linux/fuse.h>
#include <linux/fs.h>
#include <linux/mount.h>
@@ -690,6 +694,9 @@ struct fuse_conn {
/** Use enhanced/automatic page cache invalidation. */
unsigned auto_inval_data:1;
+ /** Filesystem is fully reponsible for page cache invalidation. */
+ unsigned explicit_inval_data:1;
+
/** Does the filesystem support readdirplus? */
unsigned do_readdirplus:1;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f485d09d14df..4bb885b0f032 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -81,14 +81,12 @@ struct fuse_forget_link *fuse_alloc_forget(void)
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
- struct inode *inode;
struct fuse_inode *fi;
- inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
- if (!inode)
+ fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
+ if (!fi)
return NULL;
- fi = get_fuse_inode(inode);
fi->i_time = 0;
fi->inval_mask = 0;
fi->nodeid = 0;
@@ -100,11 +98,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget();
if (!fi->forget) {
- kmem_cache_free(fuse_inode_cachep, inode);
+ kmem_cache_free(fuse_inode_cachep, fi);
return NULL;
}
- return inode;
+ return &fi->inode;
}
static void fuse_free_inode(struct inode *inode)
@@ -233,7 +231,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
if (oldsize != attr->size) {
truncate_pagecache(inode, attr->size);
- inval = true;
+ if (!fc->explicit_inval_data)
+ inval = true;
} else if (fc->auto_inval_data) {
struct timespec64 new_mtime = {
.tv_sec = attr->mtime,
@@ -908,6 +907,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->dont_mask = 1;
if (arg->flags & FUSE_AUTO_INVAL_DATA)
fc->auto_inval_data = 1;
+ else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA)
+ fc->explicit_inval_data = 1;
if (arg->flags & FUSE_DO_READDIRPLUS) {
fc->do_readdirplus = 1;
if (arg->flags & FUSE_READDIRPLUS_AUTO)
@@ -969,7 +970,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
- FUSE_NO_OPENDIR_SUPPORT;
+ FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -1393,8 +1394,8 @@ static int __init fuse_init(void)
{
int res;
- printk(KERN_INFO "fuse init (API version %i.%i)\n",
- FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
+ pr_info("init (API version %i.%i)\n",
+ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
INIT_LIST_HEAD(&fuse_conn_list);
res = fuse_fs_init();
@@ -1430,7 +1431,7 @@ static int __init fuse_init(void)
static void __exit fuse_exit(void)
{
- printk(KERN_DEBUG "fuse exit\n");
+ pr_debug("exit\n");
fuse_ctl_cleanup();
fuse_sysfs_cleanup();
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 1787d295834e..08e4996adc23 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -650,7 +650,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
char ro[20];
char spectator[20];
char *envp[] = { ro, spectator, NULL };
- int sysfs_frees_sdp = 0;
sprintf(ro, "RDONLY=%d", sb_rdonly(sb));
sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
@@ -661,8 +660,6 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
if (error)
goto fail_reg;
- sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling
- function gfs2_sbd_release. */
error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
if (error)
goto fail_reg;
@@ -687,10 +684,7 @@ fail_tune:
fail_reg:
free_percpu(sdp->sd_lkstats);
fs_err(sdp, "error %d adding sysfs files\n", error);
- if (sysfs_frees_sdp)
- kobject_put(&sdp->sd_kobj);
- else
- kfree(sdp);
+ kobject_put(&sdp->sd_kobj);
sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 33b8423ef0c9..f4295aa19350 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -87,7 +87,7 @@ extern int do_mkdir(const char *file, int mode);
extern int hostfs_do_rmdir(const char *file);
extern int do_mknod(const char *file, int mode, unsigned int major,
unsigned int minor);
-extern int link_file(const char *from, const char *to);
+extern int link_file(const char *to, const char *from);
extern int hostfs_do_readlink(char *file, char *buf, int size);
extern int rename_file(char *from, char *to);
extern int rename2_file(char *from, char *to, unsigned int flags);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c74ef4426282..1dcc57189382 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -440,9 +440,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
u32 hash;
index = page->index;
- hash = hugetlb_fault_mutex_hash(h, current->mm,
- &pseudo_vma,
- mapping, index, 0);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index, 0);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/*
@@ -499,8 +497,15 @@ static void hugetlbfs_evict_inode(struct inode *inode)
struct resv_map *resv_map;
remove_inode_hugepages(inode, 0, LLONG_MAX);
- resv_map = (struct resv_map *)inode->i_mapping->private_data;
- /* root inode doesn't have the resv_map, so we should check it */
+
+ /*
+ * Get the resv_map from the address space embedded in the inode.
+ * This is the address space which points to any resv_map allocated
+ * at inode creation time. If this is a device special inode,
+ * i_mapping may not point to the original address space.
+ */
+ resv_map = (struct resv_map *)(&inode->i_data)->private_data;
+ /* Only regular and link inodes have associated reserve maps */
if (resv_map)
resv_map_release(&resv_map->refs);
clear_inode(inode);
@@ -639,8 +644,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
addr = index * hpage_size;
/* mutex taken here, fault path and hole punch */
- hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
- index, addr);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index, addr);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 48ea3977012a..310f8d17c53e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -231,7 +231,6 @@ struct io_ring_ctx {
struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm;
wait_queue_head_t sqo_wait;
- unsigned sqo_stop;
struct {
/* CQ ring */
@@ -329,9 +328,8 @@ struct io_kiocb {
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
-#define REQ_F_PREPPED 16 /* prep already done */
-#define REQ_F_IO_DRAIN 32 /* drain existing IO first */
-#define REQ_F_IO_DRAINED 64 /* drain done */
+#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
+#define REQ_F_IO_DRAINED 32 /* drain done */
u64 user_data;
u32 error; /* iopoll result from callback */
u32 sequence;
@@ -490,7 +488,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
}
static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
- long res, unsigned ev_flags)
+ long res)
{
struct io_uring_cqe *cqe;
@@ -503,7 +501,7 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
if (cqe) {
WRITE_ONCE(cqe->user_data, ki_user_data);
WRITE_ONCE(cqe->res, res);
- WRITE_ONCE(cqe->flags, ev_flags);
+ WRITE_ONCE(cqe->flags, 0);
} else {
unsigned overflow = READ_ONCE(ctx->cq_ring->overflow);
@@ -522,12 +520,12 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
}
static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
- long res, unsigned ev_flags)
+ long res)
{
unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags);
- io_cqring_fill_event(ctx, user_data, res, ev_flags);
+ io_cqring_fill_event(ctx, user_data, res);
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
@@ -629,7 +627,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
- io_cqring_fill_event(ctx, req->user_data, req->error, 0);
+ io_cqring_fill_event(ctx, req->user_data, req->error);
(*nr_events)++;
if (refcount_dec_and_test(&req->refs)) {
@@ -777,7 +775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
kiocb_end_write(kiocb);
- io_cqring_add_event(req->ctx, req->user_data, res, 0);
+ io_cqring_add_event(req->ctx, req->user_data, res);
io_put_req(req);
}
@@ -896,9 +894,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
if (!req->file)
return -EBADF;
- /* For -EAGAIN retry, everything is already prepped */
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (force_nonblock && !io_file_supports_async(req->file))
force_nonblock = false;
@@ -941,7 +936,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
return -EINVAL;
kiocb->ki_complete = io_complete_rw;
}
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -1216,7 +1210,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- io_cqring_add_event(ctx, user_data, err, 0);
+ io_cqring_add_event(ctx, user_data, err);
io_put_req(req);
return 0;
}
@@ -1227,16 +1221,12 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!req->file)
return -EBADF;
- /* Prep already done (EAGAIN retry) */
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
return -EINVAL;
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -1265,7 +1255,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
end > 0 ? end : LLONG_MAX,
fsync_flags & IORING_FSYNC_DATASYNC);
- io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+ io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
return 0;
}
@@ -1277,16 +1267,12 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!req->file)
return -EBADF;
- /* Prep already done (EAGAIN retry) */
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
return -EINVAL;
- req->flags |= REQ_F_PREPPED;
return ret;
}
@@ -1313,7 +1299,7 @@ static int io_sync_file_range(struct io_kiocb *req,
ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
- io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+ io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
return 0;
}
@@ -1371,7 +1357,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
spin_unlock_irq(&ctx->completion_lock);
- io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+ io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
return 0;
}
@@ -1380,7 +1366,7 @@ static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req,
__poll_t mask)
{
req->poll.done = true;
- io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0);
+ io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask));
io_commit_cqring(ctx);
}
@@ -1700,7 +1686,7 @@ restart:
io_put_req(req);
if (ret) {
- io_cqring_add_event(ctx, sqe->user_data, ret, 0);
+ io_cqring_add_event(ctx, sqe->user_data, ret);
io_put_req(req);
}
@@ -2005,7 +1991,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
continue;
}
- io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0);
+ io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret);
}
if (statep)
@@ -2028,7 +2014,7 @@ static int io_sq_thread(void *data)
set_fs(USER_DS);
timeout = inflight = 0;
- while (!kthread_should_stop() && !ctx->sqo_stop) {
+ while (!kthread_should_park()) {
bool all_fixed, mm_fault = false;
int i;
@@ -2090,7 +2076,7 @@ static int io_sq_thread(void *data)
smp_mb();
if (!io_get_sqring(ctx, &sqes[0])) {
- if (kthread_should_stop()) {
+ if (kthread_should_park()) {
finish_wait(&ctx->sqo_wait, &wait);
break;
}
@@ -2140,8 +2126,7 @@ static int io_sq_thread(void *data)
mmput(cur_mm);
}
- if (kthread_should_park())
- kthread_parkme();
+ kthread_parkme();
return 0;
}
@@ -2170,7 +2155,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
ret = io_submit_sqe(ctx, &s, statep);
if (ret)
- io_cqring_add_event(ctx, s.sqe->user_data, ret, 0);
+ io_cqring_add_event(ctx, s.sqe->user_data, ret);
}
io_commit_sqring(ctx);
@@ -2182,6 +2167,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
static unsigned io_cqring_events(struct io_cq_ring *ring)
{
+ /* See comment at the top of this file */
+ smp_rmb();
return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
}
@@ -2194,11 +2181,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
{
struct io_cq_ring *ring = ctx->cq_ring;
sigset_t ksigmask, sigsaved;
- DEFINE_WAIT(wait);
int ret;
- /* See comment at the top of this file */
- smp_rmb();
if (io_cqring_events(ring) >= min_events)
return 0;
@@ -2216,23 +2200,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
}
- do {
- prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE);
-
- ret = 0;
- /* See comment at the top of this file */
- smp_rmb();
- if (io_cqring_events(ring) >= min_events)
- break;
-
- schedule();
-
+ ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
+ if (ret == -ERESTARTSYS)
ret = -EINTR;
- if (signal_pending(current))
- break;
- } while (1);
-
- finish_wait(&ctx->wait, &wait);
if (sig)
restore_user_sigmask(sig, &sigsaved);
@@ -2273,8 +2243,11 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
static void io_sq_thread_stop(struct io_ring_ctx *ctx)
{
if (ctx->sqo_thread) {
- ctx->sqo_stop = 1;
- mb();
+ /*
+ * The park is a bit of a work-around, without it we get
+ * warning spews on shutdown with SQPOLL set and affinity
+ * set to a single CPU.
+ */
kthread_park(ctx->sqo_thread);
kthread_stop(ctx->sqo_thread);
ctx->sqo_thread = NULL;
@@ -2467,10 +2440,11 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
ctx->sq_thread_idle = HZ;
if (p->flags & IORING_SETUP_SQ_AFF) {
- int cpu = array_index_nospec(p->sq_thread_cpu,
- nr_cpu_ids);
+ int cpu = p->sq_thread_cpu;
ret = -EINVAL;
+ if (cpu >= nr_cpu_ids)
+ goto err;
if (!cpu_online(cpu))
goto err;
@@ -2697,8 +2671,9 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
ret = 0;
down_read(&current->mm->mmap_sem);
- pret = get_user_pages_longterm(ubuf, nr_pages, FOLL_WRITE,
- pages, vmas);
+ pret = get_user_pages(ubuf, nr_pages,
+ FOLL_WRITE | FOLL_LONGTERM,
+ pages, vmas);
if (pret == nr_pages) {
/* don't support file backed memory */
for (j = 0; j < nr_pages; j++) {
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 70f520b41a19..5fb4f8910aab 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -56,7 +56,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4;
int status;
- status = lockd_up(nlm_init->net);
+ status = lockd_up(nlm_init->net, nlm_init->cred);
if (status < 0)
return ERR_PTR(status);
@@ -241,7 +241,7 @@ reclaimer(void *ptr)
allow_signal(SIGKILL);
down_write(&host->h_rwsem);
- lockd_up(net); /* note: this cannot fail as lockd is already running */
+ lockd_up(net, NULL); /* note: this cannot fail as lockd is already running */
dprintk("lockd: reclaiming locks for host %s\n", host->h_name);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 346ed161756d..3056f3a0c270 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -188,28 +188,31 @@ lockd(void *vrqstp)
static int create_lockd_listener(struct svc_serv *serv, const char *name,
struct net *net, const int family,
- const unsigned short port)
+ const unsigned short port,
+ const struct cred *cred)
{
struct svc_xprt *xprt;
xprt = svc_find_xprt(serv, name, net, family, 0);
if (xprt == NULL)
return svc_create_xprt(serv, name, net, family, port,
- SVC_SOCK_DEFAULTS);
+ SVC_SOCK_DEFAULTS, cred);
svc_xprt_put(xprt);
return 0;
}
static int create_lockd_family(struct svc_serv *serv, struct net *net,
- const int family)
+ const int family, const struct cred *cred)
{
int err;
- err = create_lockd_listener(serv, "udp", net, family, nlm_udpport);
+ err = create_lockd_listener(serv, "udp", net, family, nlm_udpport,
+ cred);
if (err < 0)
return err;
- return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport);
+ return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport,
+ cred);
}
/*
@@ -222,16 +225,17 @@ static int create_lockd_family(struct svc_serv *serv, struct net *net,
* Returns zero if all listeners are available; otherwise a
* negative errno value is returned.
*/
-static int make_socks(struct svc_serv *serv, struct net *net)
+static int make_socks(struct svc_serv *serv, struct net *net,
+ const struct cred *cred)
{
static int warned;
int err;
- err = create_lockd_family(serv, net, PF_INET);
+ err = create_lockd_family(serv, net, PF_INET, cred);
if (err < 0)
goto out_err;
- err = create_lockd_family(serv, net, PF_INET6);
+ err = create_lockd_family(serv, net, PF_INET6, cred);
if (err < 0 && err != -EAFNOSUPPORT)
goto out_err;
@@ -246,7 +250,8 @@ out_err:
return err;
}
-static int lockd_up_net(struct svc_serv *serv, struct net *net)
+static int lockd_up_net(struct svc_serv *serv, struct net *net,
+ const struct cred *cred)
{
struct lockd_net *ln = net_generic(net, lockd_net_id);
int error;
@@ -258,7 +263,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)
if (error)
goto err_bind;
- error = make_socks(serv, net);
+ error = make_socks(serv, net, cred);
if (error < 0)
goto err_bind;
set_grace_period(net);
@@ -461,7 +466,7 @@ static struct svc_serv *lockd_create_svc(void)
/*
* Bring up the lockd process if it's not already up.
*/
-int lockd_up(struct net *net)
+int lockd_up(struct net *net, const struct cred *cred)
{
struct svc_serv *serv;
int error;
@@ -474,7 +479,7 @@ int lockd_up(struct net *net)
goto err_create;
}
- error = lockd_up_net(serv, net);
+ error = lockd_up_net(serv, net, cred);
if (error < 0) {
lockd_unregister_notifiers();
goto err_put;
@@ -807,5 +812,7 @@ static struct svc_program nlmsvc_program = {
.pg_name = "lockd", /* service name */
.pg_class = "nfsd", /* share authentication with nfsd */
.pg_stats = &nlmsvc_stats, /* stats table */
- .pg_authenticate = &lockd_authenticate /* export authentication */
+ .pg_authenticate = &lockd_authenticate, /* export authentication */
+ .pg_init_request = svc_generic_init_request,
+ .pg_rpcbind_set = svc_generic_rpcbind_set,
};
diff --git a/fs/locks.c b/fs/locks.c
index d7c05dde4ed8..8af49f89ac2f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -352,6 +352,12 @@ EXPORT_SYMBOL_GPL(locks_alloc_lock);
void locks_release_private(struct file_lock *fl)
{
+ BUG_ON(waitqueue_active(&fl->fl_wait));
+ BUG_ON(!list_empty(&fl->fl_list));
+ BUG_ON(!list_empty(&fl->fl_blocked_requests));
+ BUG_ON(!list_empty(&fl->fl_blocked_member));
+ BUG_ON(!hlist_unhashed(&fl->fl_link));
+
if (fl->fl_ops) {
if (fl->fl_ops->fl_release_private)
fl->fl_ops->fl_release_private(fl);
@@ -371,12 +377,6 @@ EXPORT_SYMBOL_GPL(locks_release_private);
/* Free a lock which is not in use. */
void locks_free_lock(struct file_lock *fl)
{
- BUG_ON(waitqueue_active(&fl->fl_wait));
- BUG_ON(!list_empty(&fl->fl_list));
- BUG_ON(!list_empty(&fl->fl_blocked_requests));
- BUG_ON(!list_empty(&fl->fl_blocked_member));
- BUG_ON(!hlist_unhashed(&fl->fl_link));
-
locks_release_private(fl);
kmem_cache_free(filelock_cache, fl);
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 0b602a39dd71..7817ad94a6ba 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -41,11 +41,13 @@ static struct svc_program nfs4_callback_program;
static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
{
+ const struct cred *cred = current_cred();
int ret;
struct nfs_net *nn = net_generic(net, nfs_net_id);
ret = svc_create_xprt(serv, "tcp", net, PF_INET,
- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
+ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+ cred);
if (ret <= 0)
goto out_err;
nn->nfs_callback_tcpport = ret;
@@ -53,7 +55,8 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
+ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+ cred);
if (ret > 0) {
nn->nfs_callback_tcpport6 = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
@@ -457,4 +460,6 @@ static struct svc_program nfs4_callback_program = {
.pg_class = "nfs", /* authentication class */
.pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate,
+ .pg_init_request = svc_generic_init_request,
+ .pg_rpcbind_set = svc_generic_rpcbind_set,
};
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 06233bfa6d73..73a5a5ea2976 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -983,7 +983,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
out_invalidcred:
pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n");
- return rpc_autherr_badcred;
+ return svc_return_autherr(rqstp, rpc_autherr_badcred);
}
/*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index da74c4c4a244..3d04cb0b839e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -558,6 +558,7 @@ static int nfs_start_lockd(struct nfs_server *server)
1 : 0,
.net = clp->cl_net,
.nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops,
+ .cred = current_cred(),
};
if (nlm_init.nfs_version > 3)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index a7d3df85736d..e6a700f01452 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -22,7 +22,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
char *ip_addr = NULL;
int ip_len;
- ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL);
+ ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL, false);
if (ip_len > 0)
ret = rpc_pton(net, ip_addr, ip_len, sa, salen);
else
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 802993d8912f..baa01956a5b3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -570,13 +570,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
err = get_int(&mesg, &an_int);
if (err)
goto out3;
- exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
+ exp.ex_anon_uid= make_kuid(current_user_ns(), an_int);
/* anon gid */
err = get_int(&mesg, &an_int);
if (err)
goto out3;
- exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
+ exp.ex_anon_gid= make_kgid(current_user_ns(), an_int);
/* fsid */
err = get_int(&mesg, &an_int);
@@ -1170,15 +1170,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp)
static void exp_flags(struct seq_file *m, int flag, int fsid,
kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc)
{
+ struct user_namespace *userns = m->file->f_cred->user_ns;
+
show_expflags(m, flag, NFSEXP_ALLFLAGS);
if (flag & NFSEXP_FSID)
seq_printf(m, ",fsid=%d", fsid);
- if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) &&
- !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2)))
- seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu));
- if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) &&
- !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2)))
- seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong));
+ if (!uid_eq(anonu, make_kuid(userns, (uid_t)-2)) &&
+ !uid_eq(anonu, make_kuid(userns, 0x10000-2)))
+ seq_printf(m, ",anonuid=%u", from_kuid_munged(userns, anonu));
+ if (!gid_eq(anong, make_kgid(userns, (gid_t)-2)) &&
+ !gid_eq(anong, make_kgid(userns, 0x10000-2)))
+ seq_printf(m, ",anongid=%u", from_kgid_munged(userns, anong));
if (fsloc && fsloc->locations_count > 0) {
char *loctype = (fsloc->migrated) ? "refer" : "replicas";
int i;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 32cb8c027483..789abc4dd1d2 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -104,6 +104,9 @@ struct nfsd_net {
time_t nfsd4_grace;
bool somebody_reclaimed;
+ bool track_reclaim_completes;
+ atomic_t nr_reclaim_complete;
+
bool nfsd_net_up;
bool lockd_up;
@@ -131,10 +134,18 @@ struct nfsd_net {
u32 s2s_cp_cl_id;
struct idr s2s_cp_stateids;
spinlock_t s2s_cp_lock;
+
+ /*
+ * Version information
+ */
+ bool *nfsd_versions;
+ bool *nfsd4_minorversions;
};
/* Simple check to find out if a given net was properly initialized */
#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
+extern void nfsd_netns_free_versions(struct nfsd_net *nn);
+
extern unsigned int nfsd_net_id;
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 8d789124ed3c..fcf31822c74c 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -96,7 +96,7 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp)
}
static __be32 *
-decode_sattr3(__be32 *p, struct iattr *iap)
+decode_sattr3(__be32 *p, struct iattr *iap, struct user_namespace *userns)
{
u32 tmp;
@@ -107,12 +107,12 @@ decode_sattr3(__be32 *p, struct iattr *iap)
iap->ia_mode = ntohl(*p++);
}
if (*p++) {
- iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
+ iap->ia_uid = make_kuid(userns, ntohl(*p++));
if (uid_valid(iap->ia_uid))
iap->ia_valid |= ATTR_UID;
}
if (*p++) {
- iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
+ iap->ia_gid = make_kgid(userns, ntohl(*p++));
if (gid_valid(iap->ia_gid))
iap->ia_valid |= ATTR_GID;
}
@@ -165,12 +165,13 @@ static __be32 *
encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct kstat *stat)
{
+ struct user_namespace *userns = nfsd_user_namespace(rqstp);
struct timespec ts;
*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
*p++ = htonl((u32) (stat->mode & S_IALLUGO));
*p++ = htonl((u32) stat->nlink);
- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
+ *p++ = htonl((u32) from_kuid_munged(userns, stat->uid));
+ *p++ = htonl((u32) from_kgid_munged(userns, stat->gid));
if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
} else {
@@ -325,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
p = decode_fh(p, &args->fh);
if (!p)
return 0;
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
if ((args->check_guard = ntohl(*p++)) != 0) {
struct timespec time;
@@ -455,7 +456,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
switch (args->createmode = ntohl(*p++)) {
case NFS3_CREATE_UNCHECKED:
case NFS3_CREATE_GUARDED:
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
break;
case NFS3_CREATE_EXCLUSIVE:
args->verf = p;
@@ -476,7 +477,7 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
if (!(p = decode_fh(p, &args->fh)) ||
!(p = decode_filename(p, &args->name, &args->len)))
return 0;
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
return xdr_argsize_check(rqstp, p);
}
@@ -491,7 +492,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
if (!(p = decode_fh(p, &args->ffh)) ||
!(p = decode_filename(p, &args->fname, &args->flen)))
return 0;
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
args->tlen = ntohl(*p++);
@@ -519,7 +520,7 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
if (args->ftype == NF3BLK || args->ftype == NF3CHR
|| args->ftype == NF3SOCK || args->ftype == NF3FIFO)
- p = decode_sattr3(p, &args->attrs);
+ p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
args->major = ntohl(*p++);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 9b93e7a9a26d..397eb7820929 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1123,10 +1123,11 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
rpc_restart_call_prepare(task);
return;
case 1:
- break;
- case -1:
- /* Network partition? */
- nfsd4_mark_cb_down(clp, task->tk_status);
+ switch (task->tk_status) {
+ case -EIO:
+ case -ETIMEDOUT:
+ nfsd4_mark_cb_down(clp, task->tk_status);
+ }
break;
default:
BUG();
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index bf137fec33ff..2961016097ac 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -634,7 +634,7 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
- *uid = make_kuid(&init_user_ns, id);
+ *uid = make_kuid(nfsd_user_namespace(rqstp), id);
if (!uid_valid(*uid))
status = nfserr_badowner;
return status;
@@ -651,7 +651,7 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
- *gid = make_kgid(&init_user_ns, id);
+ *gid = make_kgid(nfsd_user_namespace(rqstp), id);
if (!gid_valid(*gid))
status = nfserr_badowner;
return status;
@@ -660,13 +660,13 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kuid_t uid)
{
- u32 id = from_kuid(&init_user_ns, uid);
+ u32 id = from_kuid_munged(nfsd_user_namespace(rqstp), uid);
return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id);
}
__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kgid_t gid)
{
- u32 id = from_kgid(&init_user_ns, gid);
+ u32 id = from_kgid_munged(nfsd_user_namespace(rqstp), gid);
return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id);
}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 44517fb5c0de..a79e24b79095 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -693,7 +693,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
ops->fence_client(ls);
else
nfsd4_cb_layout_fail(ls);
- return -1;
+ return 1;
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4680ad3bf55b..8beda999e134 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1927,6 +1927,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
struct nfsd4_compound_state *cstate = &resp->cstate;
struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
__be32 status;
svcxdr_init_encode(rqstp, resp);
@@ -1949,7 +1950,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
* According to RFC3010, this takes precedence over all other errors.
*/
status = nfserr_minor_vers_mismatch;
- if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0)
+ if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0)
goto out;
status = nfserr_resource;
if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 8c8563441208..87679557d0d6 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -169,12 +169,33 @@ legacy_recdir_name_error(struct nfs4_client *clp, int error)
}
static void
+__nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
+ const char *dname, int len, struct nfsd_net *nn)
+{
+ struct xdr_netobj name;
+ struct nfs4_client_reclaim *crp;
+
+ name.data = kmemdup(dname, len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return;
+ }
+ name.len = len;
+ crp = nfs4_client_to_reclaim(name, nn);
+ if (!crp) {
+ kfree(name.data);
+ return;
+ }
+ crp->cr_clp = clp;
+}
+
+static void
nfsd4_create_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
char dname[HEXDIR_LEN];
struct dentry *dir, *dentry;
- struct nfs4_client_reclaim *crp;
int status;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -220,11 +241,9 @@ out_put:
out_unlock:
inode_unlock(d_inode(dir));
if (status == 0) {
- if (nn->in_grace) {
- crp = nfs4_client_to_reclaim(dname, nn);
- if (crp)
- crp->cr_clp = clp;
- }
+ if (nn->in_grace)
+ __nfsd4_create_reclaim_record_grace(clp, dname,
+ HEXDIR_LEN, nn);
vfs_fsync(nn->rec_file, 0);
} else {
printk(KERN_ERR "NFSD: failed to write recovery record"
@@ -345,10 +364,29 @@ out_unlock:
}
static void
+__nfsd4_remove_reclaim_record_grace(const char *dname, int len,
+ struct nfsd_net *nn)
+{
+ struct xdr_netobj name;
+ struct nfs4_client_reclaim *crp;
+
+ name.data = kmemdup(dname, len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return;
+ }
+ name.len = len;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ nfs4_remove_reclaim_record(crp, nn);
+}
+
+static void
nfsd4_remove_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
- struct nfs4_client_reclaim *crp;
char dname[HEXDIR_LEN];
int status;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -373,12 +411,9 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
nfs4_reset_creds(original_cred);
if (status == 0) {
vfs_fsync(nn->rec_file, 0);
- if (nn->in_grace) {
- /* remove reclaim record */
- crp = nfsd4_find_reclaim_client(dname, nn);
- if (crp)
- nfs4_remove_reclaim_record(crp, nn);
- }
+ if (nn->in_grace)
+ __nfsd4_remove_reclaim_record_grace(dname,
+ HEXDIR_LEN, nn);
}
out_drop_write:
mnt_drop_write_file(nn->rec_file);
@@ -392,14 +427,31 @@ static int
purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
{
int status;
+ struct xdr_netobj name;
- if (nfs4_has_reclaimed_state(child->d_name.name, nn))
+ if (child->d_name.len != HEXDIR_LEN - 1) {
+ printk("%s: illegal name %pd in recovery directory\n",
+ __func__, child);
+ /* Keep trying; maybe the others are OK: */
return 0;
+ }
+ name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out;
+ }
+ name.len = HEXDIR_LEN;
+ if (nfs4_has_reclaimed_state(name, nn))
+ goto out_free;
status = vfs_rmdir(d_inode(parent), child);
if (status)
printk("failed to remove client recovery directory %pd\n",
child);
+out_free:
+ kfree(name.data);
+out:
/* Keep trying, success or failure: */
return 0;
}
@@ -429,13 +481,24 @@ out:
static int
load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
{
+ struct xdr_netobj name;
+
if (child->d_name.len != HEXDIR_LEN - 1) {
- printk("nfsd4: illegal name %pd in recovery directory\n",
- child);
+ printk("%s: illegal name %pd in recovery directory\n",
+ __func__, child);
/* Keep trying; maybe the others are OK: */
return 0;
}
- nfs4_client_to_reclaim(child->d_name.name, nn);
+ name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out;
+ }
+ name.len = HEXDIR_LEN;
+ if (!nfs4_client_to_reclaim(name, nn))
+ kfree(name.data);
+out:
return 0;
}
@@ -564,6 +627,7 @@ nfsd4_legacy_tracking_init(struct net *net)
status = nfsd4_load_reboot_recovery_data(net);
if (status)
goto err;
+ printk("NFSD: Using legacy client tracking operations.\n");
return 0;
err:
@@ -615,6 +679,7 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
char dname[HEXDIR_LEN];
struct nfs4_client_reclaim *crp;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct xdr_netobj name;
/* did we already find that this client is stable? */
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
@@ -627,13 +692,22 @@ nfsd4_check_legacy_client(struct nfs4_client *clp)
}
/* look for it in the reclaim hashtable otherwise */
- crp = nfsd4_find_reclaim_client(dname, nn);
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out_enoent;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
if (crp) {
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
crp->cr_clp = clp;
return 0;
}
+out_enoent:
return -ENOENT;
}
@@ -656,6 +730,7 @@ struct cld_net {
spinlock_t cn_lock;
struct list_head cn_list;
unsigned int cn_xid;
+ bool cn_has_legacy;
};
struct cld_upcall {
@@ -706,6 +781,40 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
}
static ssize_t
+__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
+ struct nfsd_net *nn)
+{
+ uint8_t cmd;
+ struct xdr_netobj name;
+ uint16_t namelen;
+ struct cld_net *cn = nn->cld_net;
+
+ if (get_user(cmd, &cmsg->cm_cmd)) {
+ dprintk("%s: error when copying cmd from userspace", __func__);
+ return -EFAULT;
+ }
+ if (cmd == Cld_GraceStart) {
+ if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
+ if (IS_ERR_OR_NULL(name.data))
+ return -EFAULT;
+ name.len = namelen;
+ if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
+ name.len = name.len - 5;
+ memmove(name.data, name.data + 5, name.len);
+ cn->cn_has_legacy = true;
+ }
+ if (!nfs4_client_to_reclaim(name, nn)) {
+ kfree(name.data);
+ return -EFAULT;
+ }
+ return sizeof(*cmsg);
+ }
+ return -EFAULT;
+}
+
+static ssize_t
cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct cld_upcall *tmp, *cup;
@@ -714,6 +823,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
nfsd_net_id);
struct cld_net *cn = nn->cld_net;
+ int16_t status;
if (mlen != sizeof(*cmsg)) {
dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
@@ -727,13 +837,24 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
return -EFAULT;
}
+ /*
+ * copy the status so we know whether to remove the upcall from the
+ * list (for -EINPROGRESS, we just want to make sure the xid is
+ * valid, not remove the upcall from the list)
+ */
+ if (get_user(status, &cmsg->cm_status)) {
+ dprintk("%s: error when copying status from userspace", __func__);
+ return -EFAULT;
+ }
+
/* walk the list and find corresponding xid */
cup = NULL;
spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) {
if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
cup = tmp;
- list_del_init(&cup->cu_list);
+ if (status != -EINPROGRESS)
+ list_del_init(&cup->cu_list);
break;
}
}
@@ -745,6 +866,9 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
return -EINVAL;
}
+ if (status == -EINPROGRESS)
+ return __cld_pipe_inprogress_downcall(cmsg, nn);
+
if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
return -EFAULT;
@@ -820,7 +944,7 @@ nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
static int
-nfsd4_init_cld_pipe(struct net *net)
+__nfsd4_init_cld_pipe(struct net *net)
{
int ret;
struct dentry *dentry;
@@ -851,6 +975,7 @@ nfsd4_init_cld_pipe(struct net *net)
}
cn->cn_pipe->dentry = dentry;
+ cn->cn_has_legacy = false;
nn->cld_net = cn;
return 0;
@@ -863,6 +988,17 @@ err:
return ret;
}
+static int
+nfsd4_init_cld_pipe(struct net *net)
+{
+ int status;
+
+ status = __nfsd4_init_cld_pipe(net);
+ if (!status)
+ printk("NFSD: Using old nfsdcld client tracking operations.\n");
+ return status;
+}
+
static void
nfsd4_remove_cld_pipe(struct net *net)
{
@@ -991,9 +1127,14 @@ out_err:
"record from stable storage: %d\n", ret);
}
-/* Check for presence of a record, and update its timestamp */
+/*
+ * For older nfsdcld's that do not allow us to "slurp" the clients
+ * from the tracking database during startup.
+ *
+ * Check for presence of a record, and update its timestamp
+ */
static int
-nfsd4_cld_check(struct nfs4_client *clp)
+nfsd4_cld_check_v0(struct nfs4_client *clp)
{
int ret;
struct cld_upcall *cup;
@@ -1026,8 +1167,84 @@ nfsd4_cld_check(struct nfs4_client *clp)
return ret;
}
+/*
+ * For newer nfsdcld's that allow us to "slurp" the clients
+ * from the tracking database during startup.
+ *
+ * Check for presence of a record in the reclaim_str_hashtbl
+ */
+static int
+nfsd4_cld_check(struct nfs4_client *clp)
+{
+ struct nfs4_client_reclaim *crp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ int status;
+ char dname[HEXDIR_LEN];
+ struct xdr_netobj name;
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+ if (crp)
+ goto found;
+
+ if (cn->cn_has_legacy) {
+ status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+ if (status)
+ return -ENOENT;
+
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return -ENOENT;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ goto found;
+
+ }
+ return -ENOENT;
+found:
+ crp->cr_clp = clp;
+ return 0;
+}
+
+static int
+nfsd4_cld_grace_start(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(cn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_msg.cm_cmd = Cld_GraceStart;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ if (!ret)
+ ret = cup->cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ dprintk("%s: Unable to get clients from userspace: %d\n",
+ __func__, ret);
+ return ret;
+}
+
+/* For older nfsdcld's that need cm_gracetime */
static void
-nfsd4_cld_grace_done(struct nfsd_net *nn)
+nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
{
int ret;
struct cld_upcall *cup;
@@ -1051,11 +1268,149 @@ out_err:
printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
}
-static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+/*
+ * For newer nfsdcld's that do not need cm_gracetime. We also need to call
+ * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl.
+ */
+static void
+nfsd4_cld_grace_done(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(cn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_msg.cm_cmd = Cld_GraceDone;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ if (!ret)
+ ret = cup->cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ nfs4_release_reclaim(nn);
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
+}
+
+static int
+nfs4_cld_state_init(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int i;
+
+ nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
+ sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!nn->reclaim_str_hashtbl)
+ return -ENOMEM;
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
+ nn->reclaim_str_hashtbl_size = 0;
+ nn->track_reclaim_completes = true;
+ atomic_set(&nn->nr_reclaim_complete, 0);
+
+ return 0;
+}
+
+static void
+nfs4_cld_state_shutdown(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nn->track_reclaim_completes = false;
+ kfree(nn->reclaim_str_hashtbl);
+}
+
+static bool
+cld_running(struct nfsd_net *nn)
+{
+ struct cld_net *cn = nn->cld_net;
+ struct rpc_pipe *pipe = cn->cn_pipe;
+
+ return pipe->nreaders || pipe->nwriters;
+}
+
+static int
+nfsd4_cld_tracking_init(struct net *net)
+{
+ int status;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool running;
+ int retries = 10;
+
+ status = nfs4_cld_state_init(net);
+ if (status)
+ return status;
+
+ status = __nfsd4_init_cld_pipe(net);
+ if (status)
+ goto err_shutdown;
+
+ /*
+ * rpc pipe upcalls take 30 seconds to time out, so we don't want to
+ * queue an upcall unless we know that nfsdcld is running (because we
+ * want this to fail fast so that nfsd4_client_tracking_init() can try
+ * the next client tracking method). nfsdcld should already be running
+ * before nfsd is started, so the wait here is for nfsdcld to open the
+ * pipefs file we just created.
+ */
+ while (!(running = cld_running(nn)) && retries--)
+ msleep(100);
+
+ if (!running) {
+ status = -ETIMEDOUT;
+ goto err_remove;
+ }
+
+ status = nfsd4_cld_grace_start(nn);
+ if (status) {
+ if (status == -EOPNOTSUPP)
+ printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n");
+ nfs4_release_reclaim(nn);
+ goto err_remove;
+ } else
+ printk("NFSD: Using nfsdcld client tracking operations.\n");
+ return 0;
+
+err_remove:
+ nfsd4_remove_cld_pipe(net);
+err_shutdown:
+ nfs4_cld_state_shutdown(net);
+ return status;
+}
+
+static void
+nfsd4_cld_tracking_exit(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs4_release_reclaim(nn);
+ nfsd4_remove_cld_pipe(net);
+ nfs4_cld_state_shutdown(net);
+}
+
+/* For older nfsdcld's */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
.init = nfsd4_init_cld_pipe,
.exit = nfsd4_remove_cld_pipe,
.create = nfsd4_cld_create,
.remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check_v0,
+ .grace_done = nfsd4_cld_grace_done_v0,
+};
+
+/* For newer nfsdcld's */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+ .init = nfsd4_cld_tracking_init,
+ .exit = nfsd4_cld_tracking_exit,
+ .create = nfsd4_cld_create,
+ .remove = nfsd4_cld_remove,
.check = nfsd4_cld_check,
.grace_done = nfsd4_cld_grace_done,
};
@@ -1267,6 +1622,8 @@ nfsd4_umh_cltrack_init(struct net *net)
ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
kfree(grace_start);
+ if (!ret)
+ printk("NFSD: Using UMH upcall client tracking operations.\n");
return ret;
}
@@ -1416,9 +1773,20 @@ nfsd4_client_tracking_init(struct net *net)
if (nn->client_tracking_ops)
goto do_init;
+ /* First, try to use nfsdcld */
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+ if (status != -ETIMEDOUT) {
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0;
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+ }
+
/*
- * First, try a UMH upcall. It should succeed or fail quickly, so
- * there's little harm in trying that first.
+ * Next, try the UMH upcall.
*/
nn->client_tracking_ops = &nfsd4_umh_tracking_ops;
status = nn->client_tracking_ops->init(net);
@@ -1426,25 +1794,23 @@ nfsd4_client_tracking_init(struct net *net)
return status;
/*
- * See if the recoverydir exists and is a directory. If it is,
- * then use the legacy ops.
+ * Finally, See if the recoverydir exists and is a directory.
+ * If it is, then use the legacy ops.
*/
nn->client_tracking_ops = &nfsd4_legacy_tracking_ops;
status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
if (!status) {
status = d_is_dir(path.dentry);
path_put(&path);
- if (status)
- goto do_init;
+ if (!status) {
+ status = -EINVAL;
+ goto out;
+ }
}
- /* Finally, try to use nfsdcld */
- nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
- printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be "
- "removed in 3.10. Please transition to using "
- "nfsdcltrack.\n");
do_init:
status = nn->client_tracking_ops->init(net);
+out:
if (status) {
printk(KERN_WARNING "NFSD: Unable to initialize client "
"recovery tracking! (%d)\n", status);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index eca4a23f93c8..618e66078ee5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -77,6 +77,7 @@ static u64 current_sessionid = 1;
/* forward declarations */
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
+void nfsd4_end_grace(struct nfsd_net *nn);
/* Locking: */
@@ -1067,9 +1068,9 @@ static unsigned int clientid_hashval(u32 id)
return id & CLIENT_HASH_MASK;
}
-static unsigned int clientstr_hashval(const char *name)
+static unsigned int clientstr_hashval(struct xdr_netobj name)
{
- return opaque_hashval(name, 8) & CLIENT_HASH_MASK;
+ return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK;
}
/*
@@ -1997,6 +1998,22 @@ destroy_client(struct nfs4_client *clp)
__destroy_client(clp);
}
+static void inc_reclaim_complete(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ if (!nn->track_reclaim_completes)
+ return;
+ if (!nfsd4_find_reclaim_client(clp->cl_name, nn))
+ return;
+ if (atomic_inc_return(&nn->nr_reclaim_complete) ==
+ nn->reclaim_str_hashtbl_size) {
+ printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
+ clp->net->ns.inum);
+ nfsd4_end_grace(nn);
+ }
+}
+
static void expire_client(struct nfs4_client *clp)
{
unhash_client(clp);
@@ -2048,11 +2065,6 @@ compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
return memcmp(o1->data, o2->data, o1->len);
}
-static int same_name(const char *n1, const char *n2)
-{
- return 0 == memcmp(n1, n2, HEXDIR_LEN);
-}
-
static int
same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
{
@@ -3354,6 +3366,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp,
status = nfs_ok;
nfsd4_client_record_create(cstate->session->se_client);
+ inc_reclaim_complete(cstate->session->se_client);
out:
return status;
}
@@ -3958,6 +3971,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
switch (task->tk_status) {
case 0:
return 1;
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/*
@@ -3970,7 +3986,7 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
}
/*FALLTHRU*/
default:
- return -1;
+ return 1;
}
}
@@ -4713,7 +4729,6 @@ nfsd4_end_grace(struct nfsd_net *nn)
if (nn->grace_ended)
return;
- dprintk("NFSD: end of grace period\n");
nn->grace_ended = true;
/*
* If the server goes down again right now, an NFSv4
@@ -4749,6 +4764,10 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
unsigned long double_grace_period_end = nn->boot_time +
2 * nn->nfsd4_lease;
+ if (nn->track_reclaim_completes &&
+ atomic_read(&nn->nr_reclaim_complete) ==
+ nn->reclaim_str_hashtbl_size)
+ return false;
if (!nn->somebody_reclaimed)
return false;
nn->somebody_reclaimed = false;
@@ -4779,6 +4798,7 @@ nfs4_laundromat(struct nfsd_net *nn)
new_timeo = 0;
goto out;
}
+ dprintk("NFSD: end of grace period\n");
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);
spin_lock(&nn->client_lock);
@@ -6458,7 +6478,7 @@ alloc_reclaim(void)
}
bool
-nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
+nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
{
struct nfs4_client_reclaim *crp;
@@ -6468,20 +6488,24 @@ nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
/*
* failure => all reset bets are off, nfserr_no_grace...
+ *
+ * The caller is responsible for freeing name.data if NULL is returned (it
+ * will be freed in nfs4_remove_reclaim_record in the normal case).
*/
struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn)
+nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
{
unsigned int strhashval;
struct nfs4_client_reclaim *crp;
- dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
+ dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data);
crp = alloc_reclaim();
if (crp) {
strhashval = clientstr_hashval(name);
INIT_LIST_HEAD(&crp->cr_strhash);
list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
- memcpy(crp->cr_recdir, name, HEXDIR_LEN);
+ crp->cr_name.data = name.data;
+ crp->cr_name.len = name.len;
crp->cr_clp = NULL;
nn->reclaim_str_hashtbl_size++;
}
@@ -6492,6 +6516,7 @@ void
nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
{
list_del(&crp->cr_strhash);
+ kfree(crp->cr_name.data);
kfree(crp);
nn->reclaim_str_hashtbl_size--;
}
@@ -6515,16 +6540,16 @@ nfs4_release_reclaim(struct nfsd_net *nn)
/*
* called from OPEN, CLAIM_PREVIOUS with a new clientid. */
struct nfs4_client_reclaim *
-nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
+nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn)
{
unsigned int strhashval;
struct nfs4_client_reclaim *crp = NULL;
- dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir);
+ dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data);
- strhashval = clientstr_hashval(recdir);
+ strhashval = clientstr_hashval(name);
list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
- if (same_name(crp->cr_recdir, recdir)) {
+ if (compare_blob(&crp->cr_name, &name) == 0) {
return crp;
}
}
@@ -7262,10 +7287,19 @@ nfs4_state_start_net(struct net *net)
return ret;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
+ if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
+ goto skip_grace;
printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
nn->nfsd4_grace, net->ns.inum);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;
+
+skip_grace:
+ printk(KERN_INFO "NFSD: no clients to reclaim, skipping NFSv4 grace period (net %x)\n",
+ net->ns.inum);
+ queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_lease * HZ);
+ nfsd4_end_grace(nn);
+ return 0;
}
/* initialization to perform when the nfsd service is started: */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3de42a729093..52c4f6daa649 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -521,6 +521,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
{
DECODE_HEAD;
+ struct user_namespace *userns = nfsd_user_namespace(argp->rqstp);
u32 dummy, uid, gid;
char *machine_name;
int i;
@@ -563,8 +564,8 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
dummy = be32_to_cpup(p++);
READ_BUF(dummy * 4);
if (cbs->flavor == (u32)(-1)) {
- kuid_t kuid = make_kuid(&init_user_ns, uid);
- kgid_t kgid = make_kgid(&init_user_ns, gid);
+ kuid_t kuid = make_kuid(userns, uid);
+ kgid_t kgid = make_kgid(userns, gid);
if (uid_valid(kuid) && gid_valid(kgid)) {
cbs->uid = kuid;
cbs->gid = kgid;
@@ -2420,8 +2421,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
__be32 status;
int err;
struct nfs4_acl *acl = NULL;
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
void *context = NULL;
int contextlen;
+#endif
bool contextsupport = false;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
u32 minorversion = resp->cstate.minorversion;
@@ -2906,12 +2909,14 @@ out_acl:
*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
}
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
status = nfsd4_encode_security_label(xdr, rqstp, context,
contextlen);
if (status)
goto out;
}
+#endif
attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f2feb2d11bae..90972e1fd785 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -439,7 +439,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return rv;
if (newthreads < 0)
return -EINVAL;
- rv = nfsd_svc(newthreads, net);
+ rv = nfsd_svc(newthreads, net, file->f_cred);
if (rv < 0)
return rv;
} else
@@ -537,14 +537,14 @@ out_free:
}
static ssize_t
-nfsd_print_version_support(char *buf, int remaining, const char *sep,
- unsigned vers, int minor)
+nfsd_print_version_support(struct nfsd_net *nn, char *buf, int remaining,
+ const char *sep, unsigned vers, int minor)
{
const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u";
- bool supported = !!nfsd_vers(vers, NFSD_TEST);
+ bool supported = !!nfsd_vers(nn, vers, NFSD_TEST);
if (vers == 4 && minor >= 0 &&
- !nfsd_minorversion(minor, NFSD_TEST))
+ !nfsd_minorversion(nn, minor, NFSD_TEST))
supported = false;
if (minor == 0 && supported)
/*
@@ -599,20 +599,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
switch(num) {
case 2:
case 3:
- nfsd_vers(num, cmd);
+ nfsd_vers(nn, num, cmd);
break;
case 4:
if (*minorp == '.') {
- if (nfsd_minorversion(minor, cmd) < 0)
+ if (nfsd_minorversion(nn, minor, cmd) < 0)
return -EINVAL;
- } else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) {
+ } else if ((cmd == NFSD_SET) != nfsd_vers(nn, num, NFSD_TEST)) {
/*
* Either we have +4 and no minors are enabled,
* or we have -4 and at least one minor is enabled.
* In either case, propagate 'cmd' to all minors.
*/
minor = 0;
- while (nfsd_minorversion(minor, cmd) >= 0)
+ while (nfsd_minorversion(nn, minor, cmd) >= 0)
minor++;
}
break;
@@ -624,7 +624,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
/* If all get turned off, turn them back on, as
* having no versions is BAD
*/
- nfsd_reset_versions();
+ nfsd_reset_versions(nn);
}
/* Now write current state into reply buffer */
@@ -633,12 +633,12 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
remaining = SIMPLE_TRANSACTION_LIMIT;
for (num=2 ; num <= 4 ; num++) {
int minor;
- if (!nfsd_vers(num, NFSD_AVAIL))
+ if (!nfsd_vers(nn, num, NFSD_AVAIL))
continue;
minor = -1;
do {
- len = nfsd_print_version_support(buf, remaining,
+ len = nfsd_print_version_support(nn, buf, remaining,
sep, num, minor);
if (len >= remaining)
goto out;
@@ -717,7 +717,7 @@ static ssize_t __write_ports_names(char *buf, struct net *net)
* a socket of a supported family/protocol, and we use it as an
* nfsd listener.
*/
-static ssize_t __write_ports_addfd(char *buf, struct net *net)
+static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred *cred)
{
char *mesg = buf;
int fd, err;
@@ -736,7 +736,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
if (err != 0)
return err;
- err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
+ err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
if (err < 0) {
nfsd_destroy(net);
return err;
@@ -751,7 +751,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
* A transport listener is added by writing it's transport name and
* a port number.
*/
-static ssize_t __write_ports_addxprt(char *buf, struct net *net)
+static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred)
{
char transport[16];
struct svc_xprt *xprt;
@@ -769,12 +769,12 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net)
return err;
err = svc_create_xprt(nn->nfsd_serv, transport, net,
- PF_INET, port, SVC_SOCK_ANONYMOUS);
+ PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
if (err < 0)
goto out_err;
err = svc_create_xprt(nn->nfsd_serv, transport, net,
- PF_INET6, port, SVC_SOCK_ANONYMOUS);
+ PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
if (err < 0 && err != -EAFNOSUPPORT)
goto out_close;
@@ -799,10 +799,10 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size,
return __write_ports_names(buf, net);
if (isdigit(buf[0]))
- return __write_ports_addfd(buf, net);
+ return __write_ports_addfd(buf, net, file->f_cred);
if (isalpha(buf[0]))
- return __write_ports_addxprt(buf, net);
+ return __write_ports_addxprt(buf, net, file->f_cred);
return -EINVAL;
}
@@ -1239,9 +1239,12 @@ static __net_init int nfsd_init_net(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
+ nn->nfsd_versions = NULL;
+ nn->nfsd4_minorversions = NULL;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
nn->somebody_reclaimed = false;
+ nn->track_reclaim_completes = false;
nn->clverifier_counter = prandom_u32();
nn->clientid_counter = prandom_u32();
nn->s2s_cp_cl_id = nn->clientid_counter++;
@@ -1260,6 +1263,7 @@ static __net_exit void nfsd_exit_net(struct net *net)
{
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
+ nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
}
static struct pernet_operations nfsd_net_ops = {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 066899929863..24187b5dd638 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -17,6 +17,7 @@
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/msg_prot.h>
#include <uapi/linux/nfsd/debug.h>
@@ -73,7 +74,7 @@ extern const struct seq_operations nfs_exports_op;
/*
* Function prototypes.
*/
-int nfsd_svc(int nrservs, struct net *net);
+int nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
int nfsd_nrthreads(struct net *);
@@ -98,10 +99,12 @@ extern const struct svc_version nfsd_acl_version3;
#endif
#endif
+struct nfsd_net;
+
enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-int nfsd_vers(int vers, enum vers_op change);
-int nfsd_minorversion(u32 minorversion, enum vers_op change);
-void nfsd_reset_versions(void);
+int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change);
+int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change);
+void nfsd_reset_versions(struct nfsd_net *nn);
int nfsd_create_serv(struct net *net);
extern int nfsd_max_blksize;
@@ -110,6 +113,12 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
{
return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
}
+static inline struct user_namespace *
+nfsd_user_namespace(const struct svc_rqst *rqstp)
+{
+ const struct cred *cred = rqstp->rq_xprt->xpt_cred;
+ return cred ? cred->user_ns : &init_user_ns;
+}
/*
* NFSv4 State
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 89cb484f1cfb..18d94ea984ba 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -32,6 +32,24 @@
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static int nfsd_acl_rpcbind_set(struct net *,
+ const struct svc_program *,
+ u32, int,
+ unsigned short,
+ unsigned short);
+static __be32 nfsd_acl_init_request(struct svc_rqst *,
+ const struct svc_program *,
+ struct svc_process_info *);
+#endif
+static int nfsd_rpcbind_set(struct net *,
+ const struct svc_program *,
+ u32, int,
+ unsigned short,
+ unsigned short);
+static __be32 nfsd_init_request(struct svc_rqst *,
+ const struct svc_program *,
+ struct svc_process_info *);
/*
* nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
@@ -86,6 +104,8 @@ static struct svc_program nfsd_acl_program = {
.pg_class = "nfsd",
.pg_stats = &nfsd_acl_svcstats,
.pg_authenticate = &svc_set_client,
+ .pg_init_request = nfsd_acl_init_request,
+ .pg_rpcbind_set = nfsd_acl_rpcbind_set,
};
static struct svc_stat nfsd_acl_svcstats = {
@@ -105,7 +125,6 @@ static const struct svc_version *nfsd_version[] = {
#define NFSD_MINVERS 2
#define NFSD_NRVERS ARRAY_SIZE(nfsd_version)
-static const struct svc_version *nfsd_versions[NFSD_NRVERS];
struct svc_program nfsd_program = {
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -113,77 +132,136 @@ struct svc_program nfsd_program = {
#endif
.pg_prog = NFS_PROGRAM, /* program number */
.pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */
- .pg_vers = nfsd_versions, /* version table */
+ .pg_vers = nfsd_version, /* version table */
.pg_name = "nfsd", /* program name */
.pg_class = "nfsd", /* authentication class */
.pg_stats = &nfsd_svcstats, /* version table */
.pg_authenticate = &svc_set_client, /* export authentication */
-
+ .pg_init_request = nfsd_init_request,
+ .pg_rpcbind_set = nfsd_rpcbind_set,
};
-static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = {
- [0] = 1,
- [1] = 1,
- [2] = 1,
-};
+static bool
+nfsd_support_version(int vers)
+{
+ if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS)
+ return nfsd_version[vers] != NULL;
+ return false;
+}
+
+static bool *
+nfsd_alloc_versions(void)
+{
+ bool *vers = kmalloc_array(NFSD_NRVERS, sizeof(bool), GFP_KERNEL);
+ unsigned i;
+
+ if (vers) {
+ /* All compiled versions are enabled by default */
+ for (i = 0; i < NFSD_NRVERS; i++)
+ vers[i] = nfsd_support_version(i);
+ }
+ return vers;
+}
+
+static bool *
+nfsd_alloc_minorversions(void)
+{
+ bool *vers = kmalloc_array(NFSD_SUPPORTED_MINOR_VERSION + 1,
+ sizeof(bool), GFP_KERNEL);
+ unsigned i;
+
+ if (vers) {
+ /* All minor versions are enabled by default */
+ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++)
+ vers[i] = nfsd_support_version(4);
+ }
+ return vers;
+}
-int nfsd_vers(int vers, enum vers_op change)
+void
+nfsd_netns_free_versions(struct nfsd_net *nn)
+{
+ kfree(nn->nfsd_versions);
+ kfree(nn->nfsd4_minorversions);
+ nn->nfsd_versions = NULL;
+ nn->nfsd4_minorversions = NULL;
+}
+
+static void
+nfsd_netns_init_versions(struct nfsd_net *nn)
+{
+ if (!nn->nfsd_versions) {
+ nn->nfsd_versions = nfsd_alloc_versions();
+ nn->nfsd4_minorversions = nfsd_alloc_minorversions();
+ if (!nn->nfsd_versions || !nn->nfsd4_minorversions)
+ nfsd_netns_free_versions(nn);
+ }
+}
+
+int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change)
{
if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
return 0;
switch(change) {
case NFSD_SET:
- nfsd_versions[vers] = nfsd_version[vers];
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- if (vers < NFSD_ACL_NRVERS)
- nfsd_acl_versions[vers] = nfsd_acl_version[vers];
-#endif
+ if (nn->nfsd_versions)
+ nn->nfsd_versions[vers] = nfsd_support_version(vers);
break;
case NFSD_CLEAR:
- nfsd_versions[vers] = NULL;
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- if (vers < NFSD_ACL_NRVERS)
- nfsd_acl_versions[vers] = NULL;
-#endif
+ nfsd_netns_init_versions(nn);
+ if (nn->nfsd_versions)
+ nn->nfsd_versions[vers] = false;
break;
case NFSD_TEST:
- return nfsd_versions[vers] != NULL;
+ if (nn->nfsd_versions)
+ return nn->nfsd_versions[vers];
+ /* Fallthrough */
case NFSD_AVAIL:
- return nfsd_version[vers] != NULL;
+ return nfsd_support_version(vers);
}
return 0;
}
static void
-nfsd_adjust_nfsd_versions4(void)
+nfsd_adjust_nfsd_versions4(struct nfsd_net *nn)
{
unsigned i;
for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
- if (nfsd_supported_minorversions[i])
+ if (nn->nfsd4_minorversions[i])
return;
}
- nfsd_vers(4, NFSD_CLEAR);
+ nfsd_vers(nn, 4, NFSD_CLEAR);
}
-int nfsd_minorversion(u32 minorversion, enum vers_op change)
+int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change)
{
if (minorversion > NFSD_SUPPORTED_MINOR_VERSION &&
change != NFSD_AVAIL)
return -1;
+
switch(change) {
case NFSD_SET:
- nfsd_supported_minorversions[minorversion] = true;
- nfsd_vers(4, NFSD_SET);
+ if (nn->nfsd4_minorversions) {
+ nfsd_vers(nn, 4, NFSD_SET);
+ nn->nfsd4_minorversions[minorversion] =
+ nfsd_vers(nn, 4, NFSD_TEST);
+ }
break;
case NFSD_CLEAR:
- nfsd_supported_minorversions[minorversion] = false;
- nfsd_adjust_nfsd_versions4();
+ nfsd_netns_init_versions(nn);
+ if (nn->nfsd4_minorversions) {
+ nn->nfsd4_minorversions[minorversion] = false;
+ nfsd_adjust_nfsd_versions4(nn);
+ }
break;
case NFSD_TEST:
- return nfsd_supported_minorversions[minorversion];
+ if (nn->nfsd4_minorversions)
+ return nn->nfsd4_minorversions[minorversion];
+ return nfsd_vers(nn, 4, NFSD_TEST);
case NFSD_AVAIL:
- return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
+ return minorversion <= NFSD_SUPPORTED_MINOR_VERSION &&
+ nfsd_vers(nn, 4, NFSD_AVAIL);
}
return 0;
}
@@ -205,7 +283,7 @@ int nfsd_nrthreads(struct net *net)
return rv;
}
-static int nfsd_init_socks(struct net *net)
+static int nfsd_init_socks(struct net *net, const struct cred *cred)
{
int error;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -214,12 +292,12 @@ static int nfsd_init_socks(struct net *net)
return 0;
error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
- SVC_SOCK_DEFAULTS);
+ SVC_SOCK_DEFAULTS, cred);
if (error < 0)
return error;
error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
- SVC_SOCK_DEFAULTS);
+ SVC_SOCK_DEFAULTS, cred);
if (error < 0)
return error;
@@ -265,16 +343,12 @@ static void nfsd_shutdown_generic(void)
nfsd_racache_shutdown();
}
-static bool nfsd_needs_lockd(void)
+static bool nfsd_needs_lockd(struct nfsd_net *nn)
{
-#if defined(CONFIG_NFSD_V3)
- return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL);
-#else
- return (nfsd_versions[2] != NULL);
-#endif
+ return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
}
-static int nfsd_startup_net(int nrservs, struct net *net)
+static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int ret;
@@ -285,12 +359,12 @@ static int nfsd_startup_net(int nrservs, struct net *net)
ret = nfsd_startup_generic(nrservs);
if (ret)
return ret;
- ret = nfsd_init_socks(net);
+ ret = nfsd_init_socks(net, cred);
if (ret)
goto out_socks;
- if (nfsd_needs_lockd() && !nn->lockd_up) {
- ret = lockd_up(net);
+ if (nfsd_needs_lockd(nn) && !nn->lockd_up) {
+ ret = lockd_up(net, cred);
if (ret)
goto out_socks;
nn->lockd_up = 1;
@@ -422,20 +496,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
nfsd_export_flush(net);
}
-void nfsd_reset_versions(void)
+void nfsd_reset_versions(struct nfsd_net *nn)
{
int i;
for (i = 0; i < NFSD_NRVERS; i++)
- if (nfsd_vers(i, NFSD_TEST))
+ if (nfsd_vers(nn, i, NFSD_TEST))
return;
for (i = 0; i < NFSD_NRVERS; i++)
if (i != 4)
- nfsd_vers(i, NFSD_SET);
+ nfsd_vers(nn, i, NFSD_SET);
else {
int minor = 0;
- while (nfsd_minorversion(minor, NFSD_SET) >= 0)
+ while (nfsd_minorversion(nn, minor, NFSD_SET) >= 0)
minor++;
}
}
@@ -503,7 +577,7 @@ int nfsd_create_serv(struct net *net)
}
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
- nfsd_reset_versions();
+ nfsd_reset_versions(nn);
nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
&nfsd_thread_sv_ops);
if (nn->nfsd_serv == NULL)
@@ -623,7 +697,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
* this is the first time nrservs is nonzero.
*/
int
-nfsd_svc(int nrservs, struct net *net)
+nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
{
int error;
bool nfsd_up_before;
@@ -645,7 +719,7 @@ nfsd_svc(int nrservs, struct net *net)
nfsd_up_before = nn->nfsd_net_up;
- error = nfsd_startup_net(nrservs, net);
+ error = nfsd_startup_net(nrservs, net, cred);
if (error)
goto out_destroy;
error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
@@ -667,6 +741,101 @@ out:
return error;
}
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static bool
+nfsd_support_acl_version(int vers)
+{
+ if (vers >= NFSD_ACL_MINVERS && vers < NFSD_ACL_NRVERS)
+ return nfsd_acl_version[vers] != NULL;
+ return false;
+}
+
+static int
+nfsd_acl_rpcbind_set(struct net *net, const struct svc_program *progp,
+ u32 version, int family, unsigned short proto,
+ unsigned short port)
+{
+ if (!nfsd_support_acl_version(version) ||
+ !nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST))
+ return 0;
+ return svc_generic_rpcbind_set(net, progp, version, family,
+ proto, port);
+}
+
+static __be32
+nfsd_acl_init_request(struct svc_rqst *rqstp,
+ const struct svc_program *progp,
+ struct svc_process_info *ret)
+{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ int i;
+
+ if (likely(nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST)))
+ return svc_generic_init_request(rqstp, progp, ret);
+
+ ret->mismatch.lovers = NFSD_ACL_NRVERS;
+ for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) {
+ if (nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.lovers = i;
+ break;
+ }
+ }
+ if (ret->mismatch.lovers == NFSD_ACL_NRVERS)
+ return rpc_prog_unavail;
+ ret->mismatch.hivers = NFSD_ACL_MINVERS;
+ for (i = NFSD_ACL_NRVERS - 1; i >= NFSD_ACL_MINVERS; i--) {
+ if (nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.hivers = i;
+ break;
+ }
+ }
+ return rpc_prog_mismatch;
+}
+#endif
+
+static int
+nfsd_rpcbind_set(struct net *net, const struct svc_program *progp,
+ u32 version, int family, unsigned short proto,
+ unsigned short port)
+{
+ if (!nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST))
+ return 0;
+ return svc_generic_rpcbind_set(net, progp, version, family,
+ proto, port);
+}
+
+static __be32
+nfsd_init_request(struct svc_rqst *rqstp,
+ const struct svc_program *progp,
+ struct svc_process_info *ret)
+{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ int i;
+
+ if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST)))
+ return svc_generic_init_request(rqstp, progp, ret);
+
+ ret->mismatch.lovers = NFSD_NRVERS;
+ for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
+ if (nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.lovers = i;
+ break;
+ }
+ }
+ if (ret->mismatch.lovers == NFSD_NRVERS)
+ return rpc_prog_unavail;
+ ret->mismatch.hivers = NFSD_MINVERS;
+ for (i = NFSD_NRVERS - 1; i >= NFSD_MINVERS; i--) {
+ if (nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.hivers = i;
+ break;
+ }
+ }
+ return rpc_prog_mismatch;
+}
/*
* This is the NFS server kernel thread
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6b2e8b73d36e..b51fe515f06f 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -71,7 +71,7 @@ decode_filename(__be32 *p, char **namp, unsigned int *lenp)
}
static __be32 *
-decode_sattr(__be32 *p, struct iattr *iap)
+decode_sattr(__be32 *p, struct iattr *iap, struct user_namespace *userns)
{
u32 tmp, tmp1;
@@ -86,12 +86,12 @@ decode_sattr(__be32 *p, struct iattr *iap)
iap->ia_mode = tmp;
}
if ((tmp = ntohl(*p++)) != (u32)-1) {
- iap->ia_uid = make_kuid(&init_user_ns, tmp);
+ iap->ia_uid = make_kuid(userns, tmp);
if (uid_valid(iap->ia_uid))
iap->ia_valid |= ATTR_UID;
}
if ((tmp = ntohl(*p++)) != (u32)-1) {
- iap->ia_gid = make_kgid(&init_user_ns, tmp);
+ iap->ia_gid = make_kgid(userns, tmp);
if (gid_valid(iap->ia_gid))
iap->ia_valid |= ATTR_GID;
}
@@ -129,6 +129,7 @@ static __be32 *
encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct kstat *stat)
{
+ struct user_namespace *userns = nfsd_user_namespace(rqstp);
struct dentry *dentry = fhp->fh_dentry;
int type;
struct timespec64 time;
@@ -139,8 +140,8 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
*p++ = htonl(nfs_ftypes[type >> 12]);
*p++ = htonl((u32) stat->mode);
*p++ = htonl((u32) stat->nlink);
- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
+ *p++ = htonl((u32) from_kuid_munged(userns, stat->uid));
+ *p++ = htonl((u32) from_kgid_munged(userns, stat->gid));
if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
*p++ = htonl(NFS_MAXPATHLEN);
@@ -216,7 +217,7 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
p = decode_fh(p, &args->fh);
if (!p)
return 0;
- p = decode_sattr(p, &args->attrs);
+ p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
return xdr_argsize_check(rqstp, p);
}
@@ -319,7 +320,7 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
if ( !(p = decode_fh(p, &args->fh))
|| !(p = decode_filename(p, &args->name, &args->len)))
return 0;
- p = decode_sattr(p, &args->attrs);
+ p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
return xdr_argsize_check(rqstp, p);
}
@@ -398,7 +399,7 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
return 0;
p += xdrlen;
}
- decode_sattr(p, &args->attrs);
+ decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp));
return 1;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9d6cb246c6c5..0b74d371ed67 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -368,7 +368,7 @@ struct nfs4_client {
struct nfs4_client_reclaim {
struct list_head cr_strhash; /* hash by cr_name */
struct nfs4_client *cr_clp; /* pointer to associated clp */
- char cr_recdir[HEXDIR_LEN]; /* recover dir */
+ struct xdr_netobj cr_name; /* recovery dir name */
};
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
@@ -620,7 +620,7 @@ void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
extern void nfs4_release_reclaim(struct nfsd_net *);
-extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
+extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct xdr_netobj name,
struct nfsd_net *nn);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
@@ -635,9 +635,9 @@ extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
-extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
+extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
struct nfsd_net *nn);
-extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
+extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
struct nfs4_file *find_file(struct knfsd_fh *fh);
void put_nfs4_file(struct nfs4_file *fi);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7dc98e14655d..fc24ee47eab5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1786,12 +1786,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
rdentry = lookup_one_len(fname, dentry, flen);
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
- goto out_nfserr;
+ goto out_drop_write;
if (d_really_is_negative(rdentry)) {
dput(rdentry);
- err = nfserr_noent;
- goto out;
+ host_err = -ENOENT;
+ goto out_drop_write;
}
if (!type)
@@ -1805,6 +1805,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
host_err = commit_metadata(fhp);
dput(rdentry);
+out_drop_write:
+ fh_drop_write(fhp);
out_nfserr:
err = nfserrno(host_err);
out:
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index a7e107309f76..db351247892d 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -120,8 +120,11 @@ void nfsd_put_raparams(struct file *file, struct raparms *ra);
static inline int fh_want_write(struct svc_fh *fh)
{
- int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
+ int ret;
+ if (fh->fh_want_write)
+ return 0;
+ ret = mnt_want_write(fh->fh_export->ex_path.mnt);
if (!ret)
fh->fh_want_write = true;
return ret;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 5433e37fb0c5..8c7cbac7183c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -108,6 +108,47 @@ void fsnotify_sb_delete(struct super_block *sb)
}
/*
+ * fsnotify_nameremove - a filename was removed from a directory
+ *
+ * This is mostly called under parent vfs inode lock so name and
+ * dentry->d_parent should be stable. However there are some corner cases where
+ * inode lock is not held. So to be on the safe side and be reselient to future
+ * callers and out of tree users of d_delete(), we do not assume that d_parent
+ * and d_name are stable and we use dget_parent() and
+ * take_dentry_name_snapshot() to grab stable references.
+ */
+void fsnotify_nameremove(struct dentry *dentry, int isdir)
+{
+ struct dentry *parent;
+ struct name_snapshot name;
+ __u32 mask = FS_DELETE;
+
+ /* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */
+ if (IS_ROOT(dentry))
+ return;
+
+ if (isdir)
+ mask |= FS_ISDIR;
+
+ parent = dget_parent(dentry);
+ /* Avoid unneeded take_dentry_name_snapshot() */
+ if (!(d_inode(parent)->i_fsnotify_mask & FS_DELETE) &&
+ !(dentry->d_sb->s_fsnotify_mask & FS_DELETE))
+ goto out_dput;
+
+ take_dentry_name_snapshot(&name, dentry);
+
+ fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
+ &name.name, 0);
+
+ release_dentry_name_snapshot(&name);
+
+out_dput:
+ dput(parent);
+}
+EXPORT_SYMBOL(fsnotify_nameremove);
+
+/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
* on a child we run all of our children and set a dentry flag saying that the
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 22acb0a79b53..b251105f646f 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -619,6 +619,11 @@ restart:
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
added:
+ /*
+ * Since connector is attached to object using cmpxchg() we are
+ * guaranteed that connector initialization is fully visible by anyone
+ * seeing mark->connector set.
+ */
WRITE_ONCE(mark->connector, conn);
out_err:
spin_unlock(&conn->lock);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c121abbdfc7d..85f21caaa6ec 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -69,10 +69,6 @@
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
-static unsigned char ocfs2_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-};
-
static int ocfs2_do_extend_dir(struct super_block *sb,
handle_t *handle,
struct inode *dir,
@@ -1718,7 +1714,7 @@ int __ocfs2_add_entry(handle_t *handle,
de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
de = de1;
}
- de->file_type = OCFS2_FT_UNKNOWN;
+ de->file_type = FT_UNKNOWN;
if (blkno) {
de->inode = cpu_to_le64(blkno);
ocfs2_set_de_type(de, inode->i_mode);
@@ -1803,13 +1799,9 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
}
offset += le16_to_cpu(de->rec_len);
if (le64_to_cpu(de->inode)) {
- unsigned char d_type = DT_UNKNOWN;
-
- if (de->file_type < OCFS2_FT_MAX)
- d_type = ocfs2_filetype_table[de->file_type];
-
if (!dir_emit(ctx, de->name, de->name_len,
- le64_to_cpu(de->inode), d_type))
+ le64_to_cpu(de->inode),
+ fs_ftype_to_dtype(de->file_type)))
goto out;
}
ctx->pos += le16_to_cpu(de->rec_len);
@@ -1900,14 +1892,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
break;
}
if (le64_to_cpu(de->inode)) {
- unsigned char d_type = DT_UNKNOWN;
-
- if (de->file_type < OCFS2_FT_MAX)
- d_type = ocfs2_filetype_table[de->file_type];
if (!dir_emit(ctx, de->name,
de->name_len,
le64_to_cpu(de->inode),
- d_type)) {
+ fs_ftype_to_dtype(de->file_type))) {
brelse(bh);
return 0;
}
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 4bf8d5854b27..af2888d23de3 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -148,16 +148,24 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
u64 blkno;
struct dentry *parent;
struct inode *dir = d_inode(child);
+ int set;
trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
(unsigned long long)OCFS2_I(dir)->ip_blkno);
+ status = ocfs2_nfs_sync_lock(OCFS2_SB(dir->i_sb), 1);
+ if (status < 0) {
+ mlog(ML_ERROR, "getting nfs sync lock(EX) failed %d\n", status);
+ parent = ERR_PTR(status);
+ goto bail;
+ }
+
status = ocfs2_inode_lock(dir, NULL, 0);
if (status < 0) {
if (status != -ENOENT)
mlog_errno(status);
parent = ERR_PTR(status);
- goto bail;
+ goto unlock_nfs_sync;
}
status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
@@ -166,11 +174,31 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
goto bail_unlock;
}
+ status = ocfs2_test_inode_bit(OCFS2_SB(dir->i_sb), blkno, &set);
+ if (status < 0) {
+ if (status == -EINVAL) {
+ status = -ESTALE;
+ } else
+ mlog(ML_ERROR, "test inode bit failed %d\n", status);
+ parent = ERR_PTR(status);
+ goto bail_unlock;
+ }
+
+ trace_ocfs2_get_dentry_test_bit(status, set);
+ if (!set) {
+ status = -ESTALE;
+ parent = ERR_PTR(status);
+ goto bail_unlock;
+ }
+
parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
bail_unlock:
ocfs2_inode_unlock(dir, 0);
+unlock_nfs_sync:
+ ocfs2_nfs_sync_unlock(OCFS2_SB(dir->i_sb), 1);
+
bail:
trace_ocfs2_get_parent_end(parent);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7071ad0dec90..b86bf5e74348 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -392,21 +392,6 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
#define OCFS2_HB_GLOBAL "heartbeat=global"
/*
- * OCFS2 directory file types. Only the low 3 bits are used. The
- * other bits are reserved for now.
- */
-#define OCFS2_FT_UNKNOWN 0
-#define OCFS2_FT_REG_FILE 1
-#define OCFS2_FT_DIR 2
-#define OCFS2_FT_CHRDEV 3
-#define OCFS2_FT_BLKDEV 4
-#define OCFS2_FT_FIFO 5
-#define OCFS2_FT_SOCK 6
-#define OCFS2_FT_SYMLINK 7
-
-#define OCFS2_FT_MAX 8
-
-/*
* OCFS2_DIR_PAD defines the directory entries boundaries
*
* NOTE: It must be a multiple of 4
@@ -424,17 +409,6 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
#define OCFS2_LINKS_HI_SHIFT 16
#define OCFS2_DX_ENTRIES_MAX (0xffffffffU)
-#define S_SHIFT 12
-static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = OCFS2_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = OCFS2_FT_DIR,
- [S_IFCHR >> S_SHIFT] = OCFS2_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = OCFS2_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = OCFS2_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = OCFS2_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = OCFS2_FT_SYMLINK,
-};
-
/*
* Convenience casts
@@ -1629,7 +1603,7 @@ static inline int ocfs2_sprintf_system_inode_name(char *buf, int len,
static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de,
umode_t mode)
{
- de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+ de->file_type = fs_umode_to_ftype(mode);
}
static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd)
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index d4811f981608..2bb916d68576 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -269,7 +269,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
/* map the pages */
ret = get_user_pages_fast((unsigned long)user_desc->ptr,
- bufmap->page_count, 1, bufmap->page_array);
+ bufmap->page_count, FOLL_WRITE, bufmap->page_array);
if (ret < 0)
return ret;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 68b3303e4b46..56feaa739979 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -909,14 +909,14 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
return true;
}
-int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
+int ovl_maybe_copy_up(struct dentry *dentry, int flags)
{
int err = 0;
- if (ovl_open_need_copy_up(dentry, file_flags)) {
+ if (ovl_open_need_copy_up(dentry, flags)) {
err = ovl_want_write(dentry);
if (!err) {
- err = ovl_copy_up_flags(dentry, file_flags);
+ err = ovl_copy_up_flags(dentry, flags);
ovl_drop_write(dentry);
}
}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 82c129bfe58d..93872bb50230 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -260,7 +260,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
* hashed directory inode aliases.
*/
inode = ovl_get_inode(dentry->d_sb, &oip);
- if (WARN_ON(IS_ERR(inode)))
+ if (IS_ERR(inode))
return PTR_ERR(inode);
} else {
WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 84dd957efa24..540a8b845145 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -11,6 +11,7 @@
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/uio.h>
+#include <linux/uaccess.h>
#include "overlayfs.h"
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
@@ -29,10 +30,11 @@ static struct file *ovl_open_realfile(const struct file *file,
struct inode *inode = file_inode(file);
struct file *realfile;
const struct cred *old_cred;
+ int flags = file->f_flags | O_NOATIME | FMODE_NONOTIFY;
old_cred = ovl_override_creds(inode->i_sb);
- realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
- realinode, current_cred());
+ realfile = open_with_fake_path(&file->f_path, flags, realinode,
+ current_cred());
revert_creds(old_cred);
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
@@ -50,7 +52,7 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
int err;
/* No atime modificaton on underlying */
- flags |= O_NOATIME;
+ flags |= O_NOATIME | FMODE_NONOTIFY;
/* If some flag changed that cannot be changed then something's amiss */
if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
@@ -116,11 +118,10 @@ static int ovl_real_fdget(const struct file *file, struct fd *real)
static int ovl_open(struct inode *inode, struct file *file)
{
- struct dentry *dentry = file_dentry(file);
struct file *realfile;
int err;
- err = ovl_open_maybe_copy_up(dentry, file->f_flags);
+ err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
if (err)
return err;
@@ -145,11 +146,47 @@ static int ovl_release(struct inode *inode, struct file *file)
static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
{
- struct inode *realinode = ovl_inode_real(file_inode(file));
+ struct inode *inode = file_inode(file);
+ struct fd real;
+ const struct cred *old_cred;
+ ssize_t ret;
+
+ /*
+ * The two special cases below do not need to involve real fs,
+ * so we can optimizing concurrent callers.
+ */
+ if (offset == 0) {
+ if (whence == SEEK_CUR)
+ return file->f_pos;
+
+ if (whence == SEEK_SET)
+ return vfs_setpos(file, 0, 0);
+ }
+
+ ret = ovl_real_fdget(file, &real);
+ if (ret)
+ return ret;
+
+ /*
+ * Overlay file f_pos is the master copy that is preserved
+ * through copy up and modified on read/write, but only real
+ * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
+ * limitations that are more strict than ->s_maxbytes for specific
+ * files, so we use the real file to perform seeks.
+ */
+ inode_lock(inode);
+ real.file->f_pos = file->f_pos;
+
+ old_cred = ovl_override_creds(inode->i_sb);
+ ret = vfs_llseek(real.file, offset, whence);
+ revert_creds(old_cred);
+
+ file->f_pos = real.file->f_pos;
+ inode_unlock(inode);
+
+ fdput(real);
- return generic_file_llseek_size(file, offset, whence,
- realinode->i_sb->s_maxbytes,
- i_size_read(realinode));
+ return ret;
}
static void ovl_file_accessed(struct file *file)
@@ -372,10 +409,68 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd,
return ret;
}
-static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static unsigned int ovl_get_inode_flags(struct inode *inode)
+{
+ unsigned int flags = READ_ONCE(inode->i_flags);
+ unsigned int ovl_iflags = 0;
+
+ if (flags & S_SYNC)
+ ovl_iflags |= FS_SYNC_FL;
+ if (flags & S_APPEND)
+ ovl_iflags |= FS_APPEND_FL;
+ if (flags & S_IMMUTABLE)
+ ovl_iflags |= FS_IMMUTABLE_FL;
+ if (flags & S_NOATIME)
+ ovl_iflags |= FS_NOATIME_FL;
+
+ return ovl_iflags;
+}
+
+static long ovl_ioctl_set_flags(struct file *file, unsigned long arg)
{
long ret;
struct inode *inode = file_inode(file);
+ unsigned int flags;
+ unsigned int old_flags;
+
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ if (get_user(flags, (int __user *) arg))
+ return -EFAULT;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+
+ /* Check the capability before cred override */
+ ret = -EPERM;
+ old_flags = ovl_get_inode_flags(inode);
+ if (((flags ^ old_flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ goto unlock;
+
+ ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
+ if (ret)
+ goto unlock;
+
+ ret = ovl_real_ioctl(file, FS_IOC_SETFLAGS, arg);
+
+ ovl_copyflags(ovl_inode_real(inode), inode);
+unlock:
+ inode_unlock(inode);
+
+ mnt_drop_write_file(file);
+
+ return ret;
+
+}
+
+static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ long ret;
switch (cmd) {
case FS_IOC_GETFLAGS:
@@ -383,23 +478,7 @@ static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
case FS_IOC_SETFLAGS:
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- ret = ovl_copy_up_with_data(file_dentry(file));
- if (!ret) {
- ret = ovl_real_ioctl(file, cmd, arg);
-
- inode_lock(inode);
- ovl_copyflags(ovl_inode_real(inode), inode);
- inode_unlock(inode);
- }
-
- mnt_drop_write_file(file);
+ ret = ovl_ioctl_set_flags(file, arg);
break;
default:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 3b7ed5d2279c..b48273e846ad 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -832,7 +832,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
bool is_dir, metacopy = false;
unsigned long ino = 0;
- int err = -ENOMEM;
+ int err = oip->newinode ? -EEXIST : -ENOMEM;
if (!realinode)
realinode = d_inode(lowerdentry);
@@ -917,6 +917,7 @@ out:
return inode;
out_err:
+ pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err);
inode = ERR_PTR(err);
goto out;
}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 9c6018287d57..d26efed9f80a 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -421,7 +421,7 @@ extern const struct file_operations ovl_file_operations;
int ovl_copy_up(struct dentry *dentry);
int ovl_copy_up_with_data(struct dentry *dentry);
int ovl_copy_up_flags(struct dentry *dentry, int flags);
-int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
+int ovl_maybe_copy_up(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b6ccb6c57706..9c8ca6cd3ce4 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -510,7 +510,7 @@ static ssize_t lstats_write(struct file *file, const char __user *buf,
if (!task)
return -ESRCH;
- clear_all_latency_tracing(task);
+ clear_tsk_latency_tracing(task);
put_task_struct(task);
return count;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 95ca1fe7283c..01d4eb0e6bd1 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1169,7 +1169,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
break;
}
- mmu_notifier_range_init(&range, mm, 0, -1UL);
+ mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+ 0, NULL, mm, 0, -1UL);
mmu_notifier_invalidate_range_start(&range);
}
walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fc20e06c56ba..9ad72ea7f71f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -9,7 +9,7 @@
* on the Melbourne quota system as used on BSD derived systems. The internal
* implementation is based on one of the several variants of the LINUX
* inode-subsystem with added complexity of the diskquota system.
- *
+ *
* Author: Marco van Wieringen <mvw@planets.elm.net>
*
* Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96
@@ -51,7 +51,7 @@
* Added journalled quota support, fix lock inversion problems
* Jan Kara, <jack@suse.cz>, 2003,2004
*
- * (C) Copyright 1994 - 1997 Marco van Wieringen
+ * (C) Copyright 1994 - 1997 Marco van Wieringen
*/
#include <linux/errno.h>
@@ -197,7 +197,7 @@ static struct quota_format_type *find_quota_format(int id)
int qm;
spin_unlock(&dq_list_lock);
-
+
for (qm = 0; module_names[qm].qm_fmt_id &&
module_names[qm].qm_fmt_id != id; qm++)
;
@@ -424,10 +424,11 @@ int dquot_acquire(struct dquot *dquot)
struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
mutex_lock(&dquot->dq_lock);
- if (!test_bit(DQ_READ_B, &dquot->dq_flags))
+ if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
- if (ret < 0)
- goto out_iolock;
+ if (ret < 0)
+ goto out_iolock;
+ }
/* Make sure flags update is visible after dquot has been filled */
smp_mb__before_atomic();
set_bit(DQ_READ_B, &dquot->dq_flags);
@@ -1049,7 +1050,9 @@ static void remove_dquot_ref(struct super_block *sb, int type,
struct list_head *tofree_head)
{
struct inode *inode;
+#ifdef CONFIG_QUOTA_DEBUG
int reserved = 0;
+#endif
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
@@ -1061,8 +1064,10 @@ static void remove_dquot_ref(struct super_block *sb, int type,
*/
spin_lock(&dq_data_lock);
if (!IS_NOQUOTA(inode)) {
+#ifdef CONFIG_QUOTA_DEBUG
if (unlikely(inode_get_rsv_space(inode) > 0))
reserved = 1;
+#endif
remove_inode_dquot_ref(inode, type, tofree_head);
}
spin_unlock(&dq_data_lock);
@@ -1663,7 +1668,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
continue;
- if (flags & DQUOT_SPACE_RESERVE) {
+ if (reserve) {
ret = dquot_add_space(dquots[cnt], 0, number, flags,
&warn[cnt]);
} else {
@@ -1676,13 +1681,11 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
if (!dquots[cnt])
continue;
spin_lock(&dquots[cnt]->dq_dqb_lock);
- if (flags & DQUOT_SPACE_RESERVE) {
- dquots[cnt]->dq_dqb.dqb_rsvspace -=
- number;
- } else {
- dquots[cnt]->dq_dqb.dqb_curspace -=
- number;
- }
+ if (reserve)
+ dquot_free_reserved_space(dquots[cnt],
+ number);
+ else
+ dquot_decr_space(dquots[cnt], number);
spin_unlock(&dquots[cnt]->dq_dqb_lock);
}
spin_unlock(&inode->i_lock);
@@ -1733,7 +1736,7 @@ int dquot_alloc_inode(struct inode *inode)
continue;
/* Back out changes we already did */
spin_lock(&dquots[cnt]->dq_dqb_lock);
- dquots[cnt]->dq_dqb.dqb_curinodes--;
+ dquot_decr_inodes(dquots[cnt], 1);
spin_unlock(&dquots[cnt]->dq_dqb_lock);
}
goto warn_put_all;
@@ -2397,7 +2400,7 @@ out_file_flags:
out_fmt:
put_quota_format(fmt);
- return error;
+ return error;
}
/* Reenable quotas on remount RW */
@@ -2775,7 +2778,7 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state)
struct qc_type_state *tstate;
struct quota_info *dqopt = sb_dqopt(sb);
int type;
-
+
memset(state, 0, sizeof(*state));
for (type = 0; type < MAXQUOTAS; type++) {
if (!sb_has_quota_active(sb, type))
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 7ac5298aba70..9f2b2573b83c 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -127,7 +127,7 @@ static int v1_check_quota_file(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
ulong blocks;
- size_t off;
+ size_t off;
struct v2_disk_dqheader dqhead;
ssize_t size;
loff_t isize;
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index a73e5b34db41..3c30034e733f 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -78,7 +78,7 @@ static int v2_check_quota_file(struct super_block *sb, int type)
struct v2_disk_dqheader dqhead;
static const uint quota_magics[] = V2_INITQMAGICS;
static const uint quota_versions[] = V2_INITQVERSIONS;
-
+
if (v2_read_header(sb, type, &dqhead))
return 0;
if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 8a76f9d14bc6..36346dc4cec0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1844,7 +1844,7 @@ static int flush_used_journal_lists(struct super_block *s,
* removes any nodes in table with name block and dev as bh.
* only touchs the hnext and hprev pointers.
*/
-void remove_journal_hash(struct super_block *sb,
+static void remove_journal_hash(struct super_block *sb,
struct reiserfs_journal_cnode **table,
struct reiserfs_journal_list *jl,
unsigned long block, int remove_freed)
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 32d8986c26fb..b5b26d8a192c 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -450,6 +450,15 @@ fail:
static inline __u32 xattr_hash(const char *msg, int len)
{
+ /*
+ * csum_partial() gives different results for little-endian and
+ * big endian hosts. Images created on little-endian hosts and
+ * mounted on big-endian hosts(and vice versa) will see csum mismatches
+ * when trying to fetch xattrs. Treating the hash as __wsum_t would
+ * lower the frequency of mismatch. This is an endianness bug in
+ * reiserfs. The return statement would result in a sparse warning. Do
+ * not fix the sparse warning so as to not hide a reminder of the bug.
+ */
return csum_partial(msg, len, 0);
}
diff --git a/fs/sync.c b/fs/sync.c
index 01e82170545a..4d1ff010bc5a 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -292,8 +292,14 @@ int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
}
if (flags & SYNC_FILE_RANGE_WRITE) {
+ int sync_mode = WB_SYNC_NONE;
+
+ if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) ==
+ SYNC_FILE_RANGE_WRITE_AND_WAIT)
+ sync_mode = WB_SYNC_ALL;
+
ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
- WB_SYNC_NONE);
+ sync_mode);
if (ret < 0)
goto out;
}
@@ -306,9 +312,9 @@ out:
}
/*
- * sys_sync_file_range() permits finely controlled syncing over a segment of
+ * ksys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
- * zero then sys_sync_file_range() will operate from offset out to EOF.
+ * zero then ksys_sync_file_range() will operate from offset out to EOF.
*
* The flag bits are:
*
@@ -325,7 +331,7 @@ out:
* Useful combinations of the flag bits are:
*
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
- * in the range which were dirty on entry to sys_sync_file_range() are placed
+ * in the range which were dirty on entry to ksys_sync_file_range() are placed
* under writeout. This is a start-write-for-data-integrity operation.
*
* SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
@@ -337,10 +343,13 @@ out:
* earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
* for that operation to complete and to return the result.
*
- * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
+ * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER
+ * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT):
* a traditional sync() operation. This is a write-for-data-integrity operation
* which will ensure that all pages in the range which were dirty on entry to
- * sys_sync_file_range() are committed to disk.
+ * ksys_sync_file_range() are written to disk. It should be noted that disk
+ * caches are not flushed by this call, so there are no guarantees here that the
+ * data will be available on disk after a crash.
*
*
* SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index b758004085c4..60f43b93d06e 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -76,7 +76,6 @@ static int ubifs_hash_calc_hmac(const struct ubifs_info *c, const u8 *hash,
int ubifs_prepare_auth_node(struct ubifs_info *c, void *node,
struct shash_desc *inhash)
{
- SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
struct ubifs_auth_node *auth = node;
u8 *hash;
int err;
@@ -85,12 +84,16 @@ int ubifs_prepare_auth_node(struct ubifs_info *c, void *node,
if (!hash)
return -ENOMEM;
- hash_desc->tfm = c->hash_tfm;
- ubifs_shash_copy_state(c, inhash, hash_desc);
+ {
+ SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
- err = crypto_shash_final(hash_desc, hash);
- if (err)
- goto out;
+ hash_desc->tfm = c->hash_tfm;
+ ubifs_shash_copy_state(c, inhash, hash_desc);
+
+ err = crypto_shash_final(hash_desc, hash);
+ if (err)
+ goto out;
+ }
err = ubifs_hash_calc_hmac(c, hash, auth->hmac);
if (err)
@@ -143,24 +146,6 @@ struct shash_desc *__ubifs_hash_get_desc(const struct ubifs_info *c)
}
/**
- * __ubifs_shash_final - finalize shash
- * @c: UBIFS file-system description object
- * @desc: the descriptor
- * @out: the output hash
- *
- * Simple wrapper around crypto_shash_final(), safe to be called with
- * disabled authentication.
- */
-int __ubifs_shash_final(const struct ubifs_info *c, struct shash_desc *desc,
- u8 *out)
-{
- if (ubifs_authenticated(c))
- return crypto_shash_final(desc, out);
-
- return 0;
-}
-
-/**
* ubifs_bad_hash - Report hash mismatches
* @c: UBIFS file-system description object
* @node: the node
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index c49ff50fdceb..3a2613038e88 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -1603,7 +1603,6 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
err = PTR_ERR(child);
goto out_unlock;
}
- zbr->znode = child;
}
znode = child;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index b73de6d04fa3..1a379b596b0d 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -790,16 +790,14 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- if (ubifs_crypt_is_encrypted(dir)) {
- err = fscrypt_get_encryption_info(dir);
- if (err && err != -ENOKEY)
- return err;
- }
-
err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &nm);
if (err)
return err;
+ err = ubifs_purge_xattrs(inode);
+ if (err)
+ return err;
+
sz_change = CALC_DENT_SIZE(fname_len(&nm));
ubifs_assert(c, inode_is_locked(dir));
@@ -900,16 +898,14 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
if (err)
return err;
- if (ubifs_crypt_is_encrypted(dir)) {
- err = fscrypt_get_encryption_info(dir);
- if (err && err != -ENOKEY)
- return err;
- }
-
err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &nm);
if (err)
return err;
+ err = ubifs_purge_xattrs(inode);
+ if (err)
+ return err;
+
sz_change = CALC_DENT_SIZE(fname_len(&nm));
err = ubifs_budget_space(c, &req);
@@ -1292,9 +1288,14 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry, old_inode->i_ino, old_dir->i_ino,
new_dentry, new_dir->i_ino, flags);
- if (unlink)
+ if (unlink) {
ubifs_assert(c, inode_is_locked(new_inode));
+ err = ubifs_purge_xattrs(new_inode);
+ if (err)
+ return err;
+ }
+
if (unlink && is_dir) {
err = ubifs_check_dir_empty(new_inode);
if (err)
@@ -1650,9 +1651,7 @@ const struct inode_operations ubifs_dir_inode_operations = {
#ifdef CONFIG_UBIFS_FS_XATTR
.listxattr = ubifs_listxattr,
#endif
-#ifdef CONFIG_UBIFS_ATIME_SUPPORT
.update_time = ubifs_update_time,
-#endif
.tmpfile = ubifs_tmpfile,
};
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5d2ffb1a45fc..512e7d9c60cd 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1375,7 +1375,6 @@ static inline int mctime_update_needed(const struct inode *inode,
return 0;
}
-#ifdef CONFIG_UBIFS_ATIME_SUPPORT
/**
* ubifs_update_time - update time of inode.
* @inode: inode to update
@@ -1392,6 +1391,9 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time,
int iflags = I_DIRTY_TIME;
int err, release;
+ if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
+ return generic_update_time(inode, time, flags);
+
err = ubifs_budget_space(c, &req);
if (err)
return err;
@@ -1414,7 +1416,6 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time,
ubifs_release_budget(c, &req);
return 0;
}
-#endif
/**
* update_mctime - update mtime and ctime of an inode.
@@ -1623,9 +1624,10 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
if (err)
return err;
vma->vm_ops = &ubifs_file_vm_ops;
-#ifdef CONFIG_UBIFS_ATIME_SUPPORT
- file_accessed(file);
-#endif
+
+ if (IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
+ file_accessed(file);
+
return 0;
}
@@ -1663,9 +1665,7 @@ const struct inode_operations ubifs_file_inode_operations = {
#ifdef CONFIG_UBIFS_FS_XATTR
.listxattr = ubifs_listxattr,
#endif
-#ifdef CONFIG_UBIFS_ATIME_SUPPORT
.update_time = ubifs_update_time,
-#endif
};
const struct inode_operations ubifs_symlink_inode_operations = {
@@ -1675,9 +1675,7 @@ const struct inode_operations ubifs_symlink_inode_operations = {
#ifdef CONFIG_UBIFS_FS_XATTR
.listxattr = ubifs_listxattr,
#endif
-#ifdef CONFIG_UBIFS_ATIME_SUPPORT
.update_time = ubifs_update_time,
-#endif
};
const struct file_operations ubifs_file_operations = {
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index f9646835b026..5deaae7fcead 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -747,12 +747,6 @@ static int cmp_dirty_idx(const struct ubifs_lprops **a,
return lpa->dirty + lpa->free - lpb->dirty - lpb->free;
}
-static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b,
- int size)
-{
- swap(*a, *b);
-}
-
/**
* ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos.
* @c: the UBIFS file-system description object
@@ -772,8 +766,7 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
sizeof(void *) * c->dirty_idx.cnt);
/* Sort it so that the dirtiest is now at the end */
sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *),
- (int (*)(const void *, const void *))cmp_dirty_idx,
- (void (*)(void *, void *, int))swap_dirty_idx);
+ (int (*)(const void *, const void *))cmp_dirty_idx, NULL);
dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt);
if (c->dirty_idx.cnt)
dbg_find("dirtiest index LEB is %d with dirty %d and free %d",
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 82e4e6a30b04..6b05b3ec500e 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -193,7 +193,6 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return err;
}
case FS_IOC_SET_ENCRYPTION_POLICY: {
-#ifdef CONFIG_FS_ENCRYPTION
struct ubifs_info *c = inode->i_sb->s_fs_info;
err = ubifs_enable_encryption(c);
@@ -201,17 +200,9 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return err;
return fscrypt_ioctl_set_policy(file, (const void __user *)arg);
-#else
- return -EOPNOTSUPP;
-#endif
}
- case FS_IOC_GET_ENCRYPTION_POLICY: {
-#ifdef CONFIG_FS_ENCRYPTION
+ case FS_IOC_GET_ENCRYPTION_POLICY:
return fscrypt_ioctl_get_policy(file, (void __user *)arg);
-#else
- return -EOPNOTSUPP;
-#endif
- }
default:
return -ENOTTY;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 729dc76c83df..74a7306978d0 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -852,10 +852,11 @@ out_free:
int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
{
int err, lnum, offs;
- struct ubifs_ino_node *ino;
+ struct ubifs_ino_node *ino, *ino_start;
struct ubifs_inode *ui = ubifs_inode(inode);
- int sync = 0, write_len, ilen = UBIFS_INO_NODE_SZ;
+ int sync = 0, write_len = 0, ilen = UBIFS_INO_NODE_SZ;
int last_reference = !inode->i_nlink;
+ int kill_xattrs = ui->xattr_cnt && last_reference;
u8 hash[UBIFS_HASH_ARR_SZ];
dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
@@ -867,14 +868,16 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
if (!last_reference) {
ilen += ui->data_len;
sync = IS_SYNC(inode);
+ } else if (kill_xattrs) {
+ write_len += UBIFS_INO_NODE_SZ * ui->xattr_cnt;
}
if (ubifs_authenticated(c))
- write_len = ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
+ write_len += ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
else
- write_len = ilen;
+ write_len += ilen;
- ino = kmalloc(write_len, GFP_NOFS);
+ ino_start = ino = kmalloc(write_len, GFP_NOFS);
if (!ino)
return -ENOMEM;
@@ -883,12 +886,59 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
if (err)
goto out_free;
+ if (kill_xattrs) {
+ union ubifs_key key;
+ struct fscrypt_name nm = {0};
+ struct inode *xino;
+ struct ubifs_dent_node *xent, *pxent = NULL;
+
+ if (ui->xattr_cnt >= ubifs_xattr_max_cnt(c)) {
+ ubifs_err(c, "Cannot delete inode, it has too much xattrs!");
+ goto out_release;
+ }
+
+ lowest_xent_key(c, &key, inode->i_ino);
+ while (1) {
+ xent = ubifs_tnc_next_ent(c, &key, &nm);
+ if (IS_ERR(xent)) {
+ err = PTR_ERR(xent);
+ if (err == -ENOENT)
+ break;
+
+ goto out_release;
+ }
+
+ fname_name(&nm) = xent->name;
+ fname_len(&nm) = le16_to_cpu(xent->nlen);
+
+ xino = ubifs_iget(c->vfs_sb, xent->inum);
+ if (IS_ERR(xino)) {
+ err = PTR_ERR(xino);
+ ubifs_err(c, "dead directory entry '%s', error %d",
+ xent->name, err);
+ ubifs_ro_mode(c, err);
+ goto out_release;
+ }
+ ubifs_assert(c, ubifs_inode(xino)->xattr);
+
+ clear_nlink(xino);
+ pack_inode(c, ino, xino, 0);
+ ino = (void *)ino + UBIFS_INO_NODE_SZ;
+ iput(xino);
+
+ kfree(pxent);
+ pxent = xent;
+ key_read(c, &xent->key, &key);
+ }
+ kfree(pxent);
+ }
+
pack_inode(c, ino, inode, 1);
err = ubifs_node_calc_hash(c, ino, hash);
if (err)
goto out_release;
- err = write_head(c, BASEHD, ino, write_len, &lnum, &offs, sync);
+ err = write_head(c, BASEHD, ino_start, write_len, &lnum, &offs, sync);
if (err)
goto out_release;
if (!sync)
@@ -903,7 +953,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
if (err)
goto out_ro;
ubifs_delete_orphan(c, inode->i_ino);
- err = ubifs_add_dirt(c, lnum, ilen);
+ err = ubifs_add_dirt(c, lnum, write_len);
} else {
union ubifs_key key;
@@ -917,7 +967,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
spin_lock(&ui->ui_lock);
ui->synced_i_size = ui->ui_size;
spin_unlock(&ui->ui_lock);
- kfree(ino);
+ kfree(ino_start);
return 0;
out_release:
@@ -926,7 +976,7 @@ out_ro:
ubifs_ro_mode(c, err);
finish_reservation(c);
out_free:
- kfree(ino);
+ kfree(ino_start);
return err;
}
@@ -966,8 +1016,8 @@ int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode)
ubifs_assert(c, inode->i_nlink == 0);
- if (ui->del_cmtno != c->cmt_no)
- /* A commit happened for sure */
+ if (ui->xattr_cnt || ui->del_cmtno != c->cmt_no)
+ /* A commit happened for sure or inode hosts xattrs */
return ubifs_jnl_write_inode(c, inode);
down_read(&c->commit_sem);
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 6f87237fdbf4..78a6e97f846e 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -288,6 +288,14 @@ static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
return lnum;
}
+static inline int ubifs_xattr_max_cnt(struct ubifs_info *c)
+{
+ int max_xattrs = (c->leb_size / 2) / UBIFS_INO_NODE_SZ;
+
+ ubifs_assert(c, max_xattrs < c->max_orphans);
+ return max_xattrs;
+}
+
const char *ubifs_assert_action_name(struct ubifs_info *c);
#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 8f70494efb0c..2f1618f300fb 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -54,30 +54,24 @@
static int dbg_check_orphans(struct ubifs_info *c);
-/**
- * ubifs_add_orphan - add an orphan.
- * @c: UBIFS file-system description object
- * @inum: orphan inode number
- *
- * Add an orphan. This function is called when an inodes link count drops to
- * zero.
- */
-int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
+static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum,
+ struct ubifs_orphan *parent_orphan)
{
struct ubifs_orphan *orphan, *o;
struct rb_node **p, *parent = NULL;
orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS);
if (!orphan)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
orphan->inum = inum;
orphan->new = 1;
+ INIT_LIST_HEAD(&orphan->child_list);
spin_lock(&c->orphan_lock);
if (c->tot_orphans >= c->max_orphans) {
spin_unlock(&c->orphan_lock);
kfree(orphan);
- return -ENFILE;
+ return ERR_PTR(-ENFILE);
}
p = &c->orph_tree.rb_node;
while (*p) {
@@ -91,7 +85,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
ubifs_err(c, "orphaned twice");
spin_unlock(&c->orphan_lock);
kfree(orphan);
- return 0;
+ return ERR_PTR(-EINVAL);
}
}
c->tot_orphans += 1;
@@ -100,24 +94,22 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
rb_insert_color(&orphan->rb, &c->orph_tree);
list_add_tail(&orphan->list, &c->orph_list);
list_add_tail(&orphan->new_list, &c->orph_new);
+
+ if (parent_orphan) {
+ list_add_tail(&orphan->child_list,
+ &parent_orphan->child_list);
+ }
+
spin_unlock(&c->orphan_lock);
dbg_gen("ino %lu", (unsigned long)inum);
- return 0;
+ return orphan;
}
-/**
- * ubifs_delete_orphan - delete an orphan.
- * @c: UBIFS file-system description object
- * @inum: orphan inode number
- *
- * Delete an orphan. This function is called when an inode is deleted.
- */
-void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
+static struct ubifs_orphan *lookup_orphan(struct ubifs_info *c, ino_t inum)
{
struct ubifs_orphan *o;
struct rb_node *p;
- spin_lock(&c->orphan_lock);
p = c->orph_tree.rb_node;
while (p) {
o = rb_entry(p, struct ubifs_orphan, rb);
@@ -126,37 +118,124 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
else if (inum > o->inum)
p = p->rb_right;
else {
- if (o->del) {
- spin_unlock(&c->orphan_lock);
- dbg_gen("deleted twice ino %lu",
- (unsigned long)inum);
- return;
- }
- if (o->cmt) {
- o->del = 1;
- o->dnext = c->orph_dnext;
- c->orph_dnext = o;
- spin_unlock(&c->orphan_lock);
- dbg_gen("delete later ino %lu",
- (unsigned long)inum);
- return;
- }
- rb_erase(p, &c->orph_tree);
- list_del(&o->list);
- c->tot_orphans -= 1;
- if (o->new) {
- list_del(&o->new_list);
- c->new_orphans -= 1;
- }
- spin_unlock(&c->orphan_lock);
- kfree(o);
- dbg_gen("inum %lu", (unsigned long)inum);
- return;
+ return o;
}
}
+ return NULL;
+}
+
+static void __orphan_drop(struct ubifs_info *c, struct ubifs_orphan *o)
+{
+ rb_erase(&o->rb, &c->orph_tree);
+ list_del(&o->list);
+ c->tot_orphans -= 1;
+
+ if (o->new) {
+ list_del(&o->new_list);
+ c->new_orphans -= 1;
+ }
+
+ kfree(o);
+}
+
+static void orphan_delete(struct ubifs_info *c, ino_t inum)
+{
+ struct ubifs_orphan *orph, *child_orph, *tmp_o;
+
+ spin_lock(&c->orphan_lock);
+
+ orph = lookup_orphan(c, inum);
+ if (!orph) {
+ spin_unlock(&c->orphan_lock);
+ ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum);
+ dump_stack();
+
+ return;
+ }
+
+ if (orph->del) {
+ spin_unlock(&c->orphan_lock);
+ dbg_gen("deleted twice ino %lu",
+ (unsigned long)inum);
+ return;
+ }
+
+ if (orph->cmt) {
+ orph->del = 1;
+ orph->dnext = c->orph_dnext;
+ c->orph_dnext = orph;
+ spin_unlock(&c->orphan_lock);
+ dbg_gen("delete later ino %lu",
+ (unsigned long)inum);
+ return;
+ }
+
+ list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) {
+ list_del(&child_orph->child_list);
+ __orphan_drop(c, child_orph);
+ }
+
+ __orphan_drop(c, orph);
+
spin_unlock(&c->orphan_lock);
- ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum);
- dump_stack();
+}
+
+/**
+ * ubifs_add_orphan - add an orphan.
+ * @c: UBIFS file-system description object
+ * @inum: orphan inode number
+ *
+ * Add an orphan. This function is called when an inodes link count drops to
+ * zero.
+ */
+int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
+{
+ int err = 0;
+ ino_t xattr_inum;
+ union ubifs_key key;
+ struct ubifs_dent_node *xent;
+ struct fscrypt_name nm = {0};
+ struct ubifs_orphan *xattr_orphan;
+ struct ubifs_orphan *orphan;
+
+ orphan = orphan_add(c, inum, NULL);
+ if (IS_ERR(orphan))
+ return PTR_ERR(orphan);
+
+ lowest_xent_key(c, &key, inum);
+ while (1) {
+ xent = ubifs_tnc_next_ent(c, &key, &nm);
+ if (IS_ERR(xent)) {
+ err = PTR_ERR(xent);
+ if (err == -ENOENT)
+ break;
+ return err;
+ }
+
+ fname_name(&nm) = xent->name;
+ fname_len(&nm) = le16_to_cpu(xent->nlen);
+ xattr_inum = le64_to_cpu(xent->inum);
+
+ xattr_orphan = orphan_add(c, xattr_inum, orphan);
+ if (IS_ERR(xattr_orphan))
+ return PTR_ERR(xattr_orphan);
+
+ key_read(c, &xent->key, &key);
+ }
+
+ return 0;
+}
+
+/**
+ * ubifs_delete_orphan - delete an orphan.
+ * @c: UBIFS file-system description object
+ * @inum: orphan inode number
+ *
+ * Delete an orphan. This function is called when an inode is deleted.
+ */
+void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
+{
+ orphan_delete(c, inum);
}
/**
@@ -611,10 +690,16 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
for (i = 0; i < n; i++) {
+ union ubifs_key key1, key2;
+
inum = le64_to_cpu(orph->inos[i]);
dbg_rcvry("deleting orphaned inode %lu",
(unsigned long)inum);
- err = ubifs_tnc_remove_ino(c, inum);
+
+ lowest_ino_key(c, &key1, inum);
+ highest_ino_key(c, &key2, inum);
+
+ err = ubifs_tnc_remove_range(c, &key1, &key2);
if (err)
return err;
err = insert_dead_orphan(c, inum);
@@ -744,26 +829,15 @@ struct check_info {
struct rb_root root;
};
-static int dbg_find_orphan(struct ubifs_info *c, ino_t inum)
+static bool dbg_find_orphan(struct ubifs_info *c, ino_t inum)
{
- struct ubifs_orphan *o;
- struct rb_node *p;
+ bool found = false;
spin_lock(&c->orphan_lock);
- p = c->orph_tree.rb_node;
- while (p) {
- o = rb_entry(p, struct ubifs_orphan, rb);
- if (inum < o->inum)
- p = p->rb_left;
- else if (inum > o->inum)
- p = p->rb_right;
- else {
- spin_unlock(&c->orphan_lock);
- return 1;
- }
- }
+ found = !!lookup_orphan(c, inum);
spin_unlock(&c->orphan_lock);
- return 0;
+
+ return found;
}
static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum)
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 67fac1e8adfb..2afc8b1d4c3b 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -748,14 +748,12 @@ int ubifs_read_superblock(struct ubifs_info *c)
goto out;
}
-#ifndef CONFIG_FS_ENCRYPTION
- if (c->encrypted) {
+ if (!IS_ENABLED(CONFIG_UBIFS_FS_ENCRYPTION) && c->encrypted) {
ubifs_err(c, "file system contains encrypted files but UBIFS"
" was built without crypto support.");
err = -EINVAL;
goto out;
}
-#endif
/* Automatically increase file system size to the maximum size */
c->old_leb_cnt = c->leb_cnt;
@@ -943,6 +941,9 @@ int ubifs_enable_encryption(struct ubifs_info *c)
int err;
struct ubifs_sb_node *sup = c->sup_node;
+ if (!IS_ENABLED(CONFIG_UBIFS_FS_ENCRYPTION))
+ return -EOPNOTSUPP;
+
if (c->encrypted)
return 0;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 632f02d4d660..04b8ecfd3470 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -129,9 +129,10 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
goto out_ino;
inode->i_flags |= S_NOCMTIME;
-#ifndef CONFIG_UBIFS_ATIME_SUPPORT
- inode->i_flags |= S_NOATIME;
-#endif
+
+ if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
+ inode->i_flags |= S_NOATIME;
+
set_nlink(inode, le32_to_cpu(ino->nlink));
i_uid_write(inode, le32_to_cpu(ino->uid));
i_gid_write(inode, le32_to_cpu(ino->gid));
@@ -1545,6 +1546,8 @@ static int mount_ubifs(struct ubifs_info *c)
c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20);
dbg_gen("max. seq. number: %llu", c->max_sqnum);
dbg_gen("commit number: %llu", c->cmt_no);
+ dbg_gen("max. xattrs per inode: %d", ubifs_xattr_max_cnt(c));
+ dbg_gen("max orphans: %d", c->max_orphans);
return 0;
@@ -2141,9 +2144,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_UBIFS_FS_XATTR
sb->s_xattr = ubifs_xattr_handlers;
#endif
-#ifdef CONFIG_FS_ENCRYPTION
- sb->s_cop = &ubifs_crypt_operations;
-#endif
+ fscrypt_set_ops(sb, &ubifs_crypt_operations);
mutex_lock(&c->umount_mutex);
err = mount_ubifs(c);
@@ -2245,11 +2246,10 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
goto out_deact;
/* We do not support atime */
sb->s_flags |= SB_ACTIVE;
-#ifndef CONFIG_UBIFS_ATIME_SUPPORT
- sb->s_flags |= SB_NOATIME;
-#else
- ubifs_msg(c, "full atime support is enabled.");
-#endif
+ if (IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
+ ubifs_msg(c, "full atime support is enabled.");
+ else
+ sb->s_flags |= SB_NOATIME;
}
/* 'fill_super()' opens ubi again so we must close it here */
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 25572ffea163..ebf8c26f5b22 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -479,14 +479,13 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
if (node_len != len)
return 0;
- if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
- !c->remounting_rw)
- return 1;
-
- crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
- node_crc = le32_to_cpu(ch->crc);
- if (crc != node_crc)
- return 0;
+ if (type != UBIFS_DATA_NODE || !c->no_chk_data_crc || c->mounting ||
+ c->remounting_rw) {
+ crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
+ node_crc = le32_to_cpu(ch->crc);
+ if (crc != node_crc)
+ return 0;
+ }
err = ubifs_node_check_hash(c, buf, zbr->hash);
if (err) {
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 1ae12900e01d..379b9f791ff6 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -924,6 +924,8 @@ struct ubifs_budget_req {
* @rb: rb-tree node of rb-tree of orphans sorted by inode number
* @list: list head of list of orphans in order added
* @new_list: list head of list of orphans added since the last commit
+ * @child_list: list of xattr childs if this orphan hosts xattrs, list head
+ * if this orphan is a xattr, not used otherwise.
* @cnext: next orphan to commit
* @dnext: next orphan to delete
* @inum: inode number
@@ -935,6 +937,7 @@ struct ubifs_orphan {
struct rb_node rb;
struct list_head list;
struct list_head new_list;
+ struct list_head child_list;
struct ubifs_orphan *cnext;
struct ubifs_orphan *dnext;
ino_t inum;
@@ -1996,9 +1999,7 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
-#ifdef CONFIG_UBIFS_ATIME_SUPPORT
int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
-#endif
/* dir.c */
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
@@ -2014,6 +2015,7 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
size_t size, int flags, bool check_lock);
ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
size_t size);
+int ubifs_purge_xattrs(struct inode *host);
#ifdef CONFIG_UBIFS_FS_XATTR
void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index f5ad1ede7990..acab3181ab35 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -61,12 +61,6 @@
#include <linux/xattr.h>
/*
- * Limit the number of extended attributes per inode so that the total size
- * (@xattr_size) is guaranteeded to fit in an 'unsigned int'.
- */
-#define MAX_XATTRS_PER_INODE 65535
-
-/*
* Extended attribute type constants.
*
* USER_XATTR: user extended attribute ("user.*")
@@ -106,7 +100,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
.new_ino_d = ALIGN(size, 8), .dirtied_ino = 1,
.dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
- if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) {
+ if (host_ui->xattr_cnt >= ubifs_xattr_max_cnt(c)) {
ubifs_err(c, "inode %lu already has too many xattrs (%d), cannot create more",
host->i_ino, host_ui->xattr_cnt);
return -ENOSPC;
@@ -507,6 +501,69 @@ out_cancel:
return err;
}
+int ubifs_purge_xattrs(struct inode *host)
+{
+ union ubifs_key key;
+ struct ubifs_info *c = host->i_sb->s_fs_info;
+ struct ubifs_dent_node *xent, *pxent = NULL;
+ struct inode *xino;
+ struct fscrypt_name nm = {0};
+ int err;
+
+ if (ubifs_inode(host)->xattr_cnt < ubifs_xattr_max_cnt(c))
+ return 0;
+
+ ubifs_warn(c, "inode %lu has too many xattrs, doing a non-atomic deletion",
+ host->i_ino);
+
+ lowest_xent_key(c, &key, host->i_ino);
+ while (1) {
+ xent = ubifs_tnc_next_ent(c, &key, &nm);
+ if (IS_ERR(xent)) {
+ err = PTR_ERR(xent);
+ break;
+ }
+
+ fname_name(&nm) = xent->name;
+ fname_len(&nm) = le16_to_cpu(xent->nlen);
+
+ xino = ubifs_iget(c->vfs_sb, xent->inum);
+ if (IS_ERR(xino)) {
+ err = PTR_ERR(xino);
+ ubifs_err(c, "dead directory entry '%s', error %d",
+ xent->name, err);
+ ubifs_ro_mode(c, err);
+ kfree(pxent);
+ return err;
+ }
+
+ ubifs_assert(c, ubifs_inode(xino)->xattr);
+
+ clear_nlink(xino);
+ err = remove_xattr(c, host, xino, &nm);
+ if (err) {
+ kfree(pxent);
+ iput(xino);
+ ubifs_err(c, "cannot remove xattr, error %d", err);
+ return err;
+ }
+
+ iput(xino);
+
+ kfree(pxent);
+ pxent = xent;
+ key_read(c, &xent->key, &key);
+ }
+
+ kfree(pxent);
+ if (err != -ENOENT) {
+ ubifs_err(c, "cannot find next direntry, error %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
/**
* ubifs_evict_xattr_inode - Evict an xattr inode.
* @c: UBIFS file-system description object
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 58cc2414992b..77b6d89b9bcd 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -304,21 +304,6 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
if (dentry->d_name.len > UDF_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
-#ifdef UDF_RECOVERY
- /* temporary shorthand for specifying files by inode number */
- if (!strncmp(dentry->d_name.name, ".B=", 3)) {
- struct kernel_lb_addr lb = {
- .logicalBlockNum = 0,
- .partitionReferenceNum =
- simple_strtoul(dentry->d_name.name + 3,
- NULL, 0),
- };
- inode = udf_iget(dir->i_sb, lb);
- if (IS_ERR(inode))
- return inode;
- } else
-#endif /* UDF_RECOVERY */
-
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
if (IS_ERR(fi))
return ERR_CAST(fi);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f64691f2168a..a14346137361 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -566,6 +566,11 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
if (!remount) {
if (uopt->nls_map)
unload_nls(uopt->nls_map);
+ /*
+ * load_nls() failure is handled later in
+ * udf_fill_super() after all options are
+ * parsed.
+ */
uopt->nls_map = load_nls(args[0].from);
uopt->flags |= (1 << UDF_FLAG_NLS_MAP);
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f5de1e726356..3b30301c90ec 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -30,6 +30,8 @@
#include <linux/security.h>
#include <linux/hugetlb.h>
+int sysctl_unprivileged_userfaultfd __read_mostly = 1;
+
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
enum userfaultfd_state {
@@ -1930,6 +1932,9 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
struct userfaultfd_ctx *ctx;
int fd;
+ if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE))
+ return -EPERM;
+
BUG_ON(!current->mm);
/* Check the UFFD_* constants for consistency. */