diff options
Diffstat (limited to 'fs/afs')
41 files changed, 6727 insertions, 5987 deletions
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index 3fb1f559e317..fc8ba9142f2f 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig @@ -4,11 +4,12 @@ config AFS_FS depends on INET select AF_RXRPC select DNS_RESOLVER + select NETFS_SUPPORT help If you say Y here, you will get an experimental Andrew File System driver. It currently only supports unsecured read-only AFS access. - See <file:Documentation/filesystems/afs.txt> for more information. + See <file:Documentation/filesystems/afs.rst> for more information. If unsure, say N. @@ -18,7 +19,7 @@ config AFS_DEBUG help Say Y here to make runtime controllable debugging messages appear. - See <file:Documentation/filesystems/afs.txt> for more information. + See <file:Documentation/filesystems/afs.rst> for more information. If unsure, say N. @@ -37,6 +38,6 @@ config AFS_DEBUG_CURSOR the dmesg log if the server rotation algorithm fails to successfully contact a server. - See <file:Documentation/filesystems/afs.txt> for more information. + See <file:Documentation/filesystems/afs.rst> for more information. If unsure, say N. diff --git a/fs/afs/Makefile b/fs/afs/Makefile index 10359bea7070..e8956b65d7ff 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile @@ -3,10 +3,7 @@ # Makefile for Red Hat Linux AFS client. # -afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o - kafs-y := \ - $(afs-cache-y) \ addr_list.o \ callback.o \ cell.o \ @@ -18,6 +15,7 @@ kafs-y := \ file.o \ flock.o \ fsclient.o \ + fs_operation.o \ fs_probe.o \ inode.o \ main.o \ @@ -30,6 +28,7 @@ kafs-y := \ server_list.o \ super.o \ vlclient.o \ + vl_alias.o \ vl_list.o \ vl_probe.o \ vl_rotate.o \ diff --git a/fs/afs/afs.h b/fs/afs/afs.h index b6d49d646ade..432cb4b23961 100644 --- a/fs/afs/afs.h +++ b/fs/afs/afs.h @@ -10,7 +10,7 @@ #include <linux/in.h> -#define AFS_MAXCELLNAME 64 /* Maximum length of a cell name */ +#define AFS_MAXCELLNAME 256 /* Maximum length of a cell name */ #define AFS_MAXVOLNAME 64 /* Maximum length of a volume name */ #define AFS_MAXNSERVERS 8 /* Maximum servers in a basic volume record */ #define AFS_NMAXNSERVERS 13 /* Maximum servers in a N/U-class volume record */ @@ -146,7 +146,6 @@ struct afs_file_status { struct afs_status_cb { struct afs_file_status status; struct afs_callback callback; - unsigned int cb_break; /* Pre-op callback break counter */ bool have_status; /* True if status record was retrieved */ bool have_cb; /* True if cb record was retrieved */ bool have_error; /* True if status.abort_code indicates an error */ diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h index e9b8029920ec..9c65ffb8a523 100644 --- a/fs/afs/afs_vl.h +++ b/fs/afs/afs_vl.h @@ -22,6 +22,7 @@ enum AFSVL_Operations { VLGETENTRYBYNAMEU = 527, /* AFS Get VLDB entry by name (UUID-variant) */ VLGETADDRSU = 533, /* AFS Get addrs for fileserver */ YVLGETENDPOINTS = 64002, /* YFS Get endpoints for file/volume server */ + YVLGETCELLNAME = 64014, /* YFS Get actual cell name */ VLGETCAPABILITIES = 65537, /* AFS Get server capabilities */ }; diff --git a/fs/afs/cache.c b/fs/afs/cache.c deleted file mode 100644 index 037af93e3aba..000000000000 --- a/fs/afs/cache.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* AFS caching stuff - * - * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - */ - -#include <linux/sched.h> -#include "internal.h" - -static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, - const void *buffer, - uint16_t buflen, - loff_t object_size); - -struct fscache_netfs afs_cache_netfs = { - .name = "afs", - .version = 2, -}; - -struct fscache_cookie_def afs_cell_cache_index_def = { - .name = "AFS.cell", - .type = FSCACHE_COOKIE_TYPE_INDEX, -}; - -struct fscache_cookie_def afs_volume_cache_index_def = { - .name = "AFS.volume", - .type = FSCACHE_COOKIE_TYPE_INDEX, -}; - -struct fscache_cookie_def afs_vnode_cache_index_def = { - .name = "AFS.vnode", - .type = FSCACHE_COOKIE_TYPE_DATAFILE, - .check_aux = afs_vnode_cache_check_aux, -}; - -/* - * check that the auxiliary data indicates that the entry is still valid - */ -static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, - const void *buffer, - uint16_t buflen, - loff_t object_size) -{ - struct afs_vnode *vnode = cookie_netfs_data; - struct afs_vnode_cache_aux aux; - - _enter("{%llx,%x,%llx},%p,%u", - vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, - buffer, buflen); - - memcpy(&aux, buffer, sizeof(aux)); - - /* check the size of the data is what we're expecting */ - if (buflen != sizeof(aux)) { - _leave(" = OBSOLETE [len %hx != %zx]", buflen, sizeof(aux)); - return FSCACHE_CHECKAUX_OBSOLETE; - } - - if (vnode->status.data_version != aux.data_version) { - _leave(" = OBSOLETE [vers %llx != %llx]", - aux.data_version, vnode->status.data_version); - return FSCACHE_CHECKAUX_OBSOLETE; - } - - _leave(" = SUCCESS"); - return FSCACHE_CHECKAUX_OKAY; -} diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 2dca8df1a18d..a484fa642808 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -21,192 +21,51 @@ #include "internal.h" /* - * Create volume and callback interests on a server. + * Handle invalidation of an mmap'd file. We invalidate all the PTEs referring + * to the pages in this file's pagecache, forcing the kernel to go through + * ->fault() or ->page_mkwrite() - at which point we can handle invalidation + * more fully. */ -static struct afs_cb_interest *afs_create_interest(struct afs_server *server, - struct afs_vnode *vnode) +void afs_invalidate_mmap_work(struct work_struct *work) { - struct afs_vol_interest *new_vi, *vi; - struct afs_cb_interest *new; - struct hlist_node **pp; - - new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL); - if (!new_vi) - return NULL; - - new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL); - if (!new) { - kfree(new_vi); - return NULL; - } - - new_vi->usage = 1; - new_vi->vid = vnode->volume->vid; - INIT_HLIST_NODE(&new_vi->srv_link); - INIT_HLIST_HEAD(&new_vi->cb_interests); - - refcount_set(&new->usage, 1); - new->sb = vnode->vfs_inode.i_sb; - new->vid = vnode->volume->vid; - new->server = afs_get_server(server, afs_server_trace_get_new_cbi); - INIT_HLIST_NODE(&new->cb_vlink); - - write_lock(&server->cb_break_lock); - - for (pp = &server->cb_volumes.first; *pp; pp = &(*pp)->next) { - vi = hlist_entry(*pp, struct afs_vol_interest, srv_link); - if (vi->vid < new_vi->vid) - continue; - if (vi->vid > new_vi->vid) - break; - vi->usage++; - goto found_vi; - } + struct afs_vnode *vnode = container_of(work, struct afs_vnode, cb_work); - new_vi->srv_link.pprev = pp; - new_vi->srv_link.next = *pp; - if (*pp) - (*pp)->pprev = &new_vi->srv_link.next; - *pp = &new_vi->srv_link; - vi = new_vi; - new_vi = NULL; -found_vi: - - new->vol_interest = vi; - hlist_add_head(&new->cb_vlink, &vi->cb_interests); - - write_unlock(&server->cb_break_lock); - kfree(new_vi); - return new; + unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false); } -/* - * Set up an interest-in-callbacks record for a volume on a server and - * register it with the server. - * - Called with vnode->io_lock held. - */ -int afs_register_server_cb_interest(struct afs_vnode *vnode, - struct afs_server_list *slist, - unsigned int index) +void afs_server_init_callback_work(struct work_struct *work) { - struct afs_server_entry *entry = &slist->servers[index]; - struct afs_cb_interest *cbi, *vcbi, *new, *old; - struct afs_server *server = entry->server; - -again: - vcbi = rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->io_lock)); - if (vcbi && likely(vcbi == entry->cb_interest)) - return 0; - - read_lock(&slist->lock); - cbi = afs_get_cb_interest(entry->cb_interest); - read_unlock(&slist->lock); - - if (vcbi) { - if (vcbi == cbi) { - afs_put_cb_interest(afs_v2net(vnode), cbi); - return 0; - } - - /* Use a new interest in the server list for the same server - * rather than an old one that's still attached to a vnode. - */ - if (cbi && vcbi->server == cbi->server) { - write_seqlock(&vnode->cb_lock); - old = rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->cb_lock.lock)); - rcu_assign_pointer(vnode->cb_interest, cbi); - write_sequnlock(&vnode->cb_lock); - afs_put_cb_interest(afs_v2net(vnode), old); - return 0; - } - - /* Re-use the one attached to the vnode. */ - if (!cbi && vcbi->server == server) { - write_lock(&slist->lock); - if (entry->cb_interest) { - write_unlock(&slist->lock); - afs_put_cb_interest(afs_v2net(vnode), cbi); - goto again; - } - - entry->cb_interest = cbi; - write_unlock(&slist->lock); - return 0; - } - } + struct afs_server *server = container_of(work, struct afs_server, initcb_work); + struct afs_vnode *vnode; + struct afs_cell *cell = server->cell; - if (!cbi) { - new = afs_create_interest(server, vnode); - if (!new) - return -ENOMEM; + down_read(&cell->fs_open_mmaps_lock); - write_lock(&slist->lock); - if (!entry->cb_interest) { - entry->cb_interest = afs_get_cb_interest(new); - cbi = new; - new = NULL; - } else { - cbi = afs_get_cb_interest(entry->cb_interest); + list_for_each_entry(vnode, &cell->fs_open_mmaps, cb_mmap_link) { + if (vnode->cb_server == server) { + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + queue_work(system_unbound_wq, &vnode->cb_work); } - write_unlock(&slist->lock); - afs_put_cb_interest(afs_v2net(vnode), new); } - ASSERT(cbi); - - /* Change the server the vnode is using. This entails scrubbing any - * interest the vnode had in the previous server it was using. - */ - write_seqlock(&vnode->cb_lock); - - old = rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->cb_lock.lock)); - rcu_assign_pointer(vnode->cb_interest, cbi); - vnode->cb_s_break = cbi->server->cb_s_break; - vnode->cb_v_break = vnode->volume->cb_v_break; - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - - write_sequnlock(&vnode->cb_lock); - afs_put_cb_interest(afs_v2net(vnode), old); - return 0; -} - -/* - * Remove an interest on a server. - */ -void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi) -{ - struct afs_vol_interest *vi; - - if (cbi && refcount_dec_and_test(&cbi->usage)) { - if (!hlist_unhashed(&cbi->cb_vlink)) { - write_lock(&cbi->server->cb_break_lock); - - hlist_del_init(&cbi->cb_vlink); - vi = cbi->vol_interest; - cbi->vol_interest = NULL; - if (--vi->usage == 0) - hlist_del(&vi->srv_link); - else - vi = NULL; - - write_unlock(&cbi->server->cb_break_lock); - if (vi) - kfree_rcu(vi, rcu); - afs_put_server(net, cbi->server, afs_server_trace_put_cbi); - } - kfree_rcu(cbi, rcu); - } + up_read(&cell->fs_open_mmaps_lock); } /* - * allow the fileserver to request callback state (re-)initialisation + * Allow the fileserver to request callback state (re-)initialisation. + * Unfortunately, UUIDs are not guaranteed unique. */ void afs_init_callback_state(struct afs_server *server) { - server->cb_s_break++; + rcu_read_lock(); + do { + server->cb_s_break++; + atomic_inc(&server->cell->fs_s_break); + if (!list_empty(&server->cell->fs_open_mmaps)) + queue_work(system_unbound_wq, &server->initcb_work); + + } while ((server = rcu_dereference(server->uuid_next))); + rcu_read_unlock(); } /* @@ -219,11 +78,17 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { vnode->cb_break++; + vnode->cb_v_break = vnode->volume->cb_v_break; afs_clear_permits(vnode); if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB) afs_lock_may_be_available(vnode); + if (reason != afs_cb_break_for_deleted && + vnode->status.type == AFS_FTYPE_FILE && + atomic_read(&vnode->cb_nr_mmap)) + queue_work(system_unbound_wq, &vnode->cb_work); + trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true); } else { trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false); @@ -238,69 +103,109 @@ void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason } /* + * Look up a volume by volume ID under RCU conditions. + */ +static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell, + afs_volid_t vid) +{ + struct afs_volume *volume = NULL; + struct rb_node *p; + int seq = 0; + + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + read_seqbegin_or_lock(&cell->volume_lock, &seq); + + p = rcu_dereference_raw(cell->volumes.rb_node); + while (p) { + volume = rb_entry(p, struct afs_volume, cell_node); + + if (volume->vid < vid) + p = rcu_dereference_raw(p->rb_left); + else if (volume->vid > vid) + p = rcu_dereference_raw(p->rb_right); + else + break; + volume = NULL; + } + + } while (need_seqretry(&cell->volume_lock, seq)); + + done_seqretry(&cell->volume_lock, seq); + return volume; +} + +/* * allow the fileserver to explicitly break one callback * - happens when * - the backing file is changed * - a lock is released */ -static void afs_break_one_callback(struct afs_server *server, +static void afs_break_one_callback(struct afs_volume *volume, struct afs_fid *fid) { - struct afs_vol_interest *vi; - struct afs_cb_interest *cbi; - struct afs_iget_data data; + struct super_block *sb; struct afs_vnode *vnode; struct inode *inode; - read_lock(&server->cb_break_lock); - hlist_for_each_entry(vi, &server->cb_volumes, srv_link) { - if (vi->vid < fid->vid) - continue; - if (vi->vid > fid->vid) { - vi = NULL; - break; - } - //atomic_inc(&vi->usage); - break; + if (fid->vnode == 0 && fid->unique == 0) { + /* The callback break applies to an entire volume. */ + write_lock(&volume->cb_v_break_lock); + volume->cb_v_break++; + trace_afs_cb_break(fid, volume->cb_v_break, + afs_cb_break_for_volume_callback, false); + write_unlock(&volume->cb_v_break_lock); + return; } + /* See if we can find a matching inode - even an I_NEW inode needs to + * be marked as it can have its callback broken before we finish + * setting up the local inode. + */ + sb = rcu_dereference(volume->sb); + if (!sb) + return; + + inode = find_inode_rcu(sb, fid->vnode, afs_ilookup5_test_by_fid, fid); + if (inode) { + vnode = AFS_FS_I(inode); + afs_break_callback(vnode, afs_cb_break_for_callback); + } else { + trace_afs_cb_miss(fid, afs_cb_break_for_callback); + } +} + +static void afs_break_some_callbacks(struct afs_server *server, + struct afs_callback_break *cbb, + size_t *_count) +{ + struct afs_callback_break *residue = cbb; + struct afs_volume *volume; + afs_volid_t vid = cbb->fid.vid; + size_t i; + + volume = afs_lookup_volume_rcu(server->cell, vid); + /* TODO: Find all matching volumes if we couldn't match the server and * break them anyway. */ - if (!vi) - goto out; - /* Step through all interested superblocks. There may be more than one - * because of cell aliasing. - */ - hlist_for_each_entry(cbi, &vi->cb_interests, cb_vlink) { - if (fid->vnode == 0 && fid->unique == 0) { - /* The callback break applies to an entire volume. */ - struct afs_super_info *as = AFS_FS_S(cbi->sb); - struct afs_volume *volume = as->volume; - - write_lock(&volume->cb_v_break_lock); - volume->cb_v_break++; - trace_afs_cb_break(fid, volume->cb_v_break, - afs_cb_break_for_volume_callback, false); - write_unlock(&volume->cb_v_break_lock); + for (i = *_count; i > 0; cbb++, i--) { + if (cbb->fid.vid == vid) { + _debug("- Fid { vl=%08llx n=%llu u=%u }", + cbb->fid.vid, + cbb->fid.vnode, + cbb->fid.unique); + --*_count; + if (volume) + afs_break_one_callback(volume, &cbb->fid); } else { - data.volume = NULL; - data.fid = *fid; - inode = ilookup5_nowait(cbi->sb, fid->vnode, - afs_iget5_test, &data); - if (inode) { - vnode = AFS_FS_I(inode); - afs_break_callback(vnode, afs_cb_break_for_callback); - iput(inode); - } else { - trace_afs_cb_miss(fid, afs_cb_break_for_callback); - } + *residue++ = *cbb; } } - -out: - read_unlock(&server->cb_break_lock); } /* @@ -313,29 +218,11 @@ void afs_break_callbacks(struct afs_server *server, size_t count, ASSERT(server != NULL); - /* TODO: Sort the callback break list by volume ID */ + rcu_read_lock(); - for (; count > 0; callbacks++, count--) { - _debug("- Fid { vl=%08llx n=%llu u=%u }", - callbacks->fid.vid, - callbacks->fid.vnode, - callbacks->fid.unique); - afs_break_one_callback(server, &callbacks->fid); - } + while (count > 0) + afs_break_some_callbacks(server, callbacks, &count); - _leave(""); + rcu_read_unlock(); return; } - -/* - * Clear the callback interests in a server list. - */ -void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist) -{ - int i; - - for (i = 0; i < slist->nr_servers; i++) { - afs_put_cb_interest(net, slist->servers[i].cb_interest); - slist->servers[i].cb_interest = NULL; - } -} diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 78ba5f932287..988c2ac7cece 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -18,8 +18,10 @@ static unsigned __read_mostly afs_cell_gc_delay = 10; static unsigned __read_mostly afs_cell_min_ttl = 10 * 60; static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60; +static atomic_t cell_debug_id; -static void afs_manage_cell(struct work_struct *); +static void afs_queue_cell_manager(struct afs_net *); +static void afs_manage_cell_work(struct work_struct *); static void afs_dec_cells_outstanding(struct afs_net *net) { @@ -37,19 +39,22 @@ static void afs_set_cell_timer(struct afs_net *net, time64_t delay) atomic_inc(&net->cells_outstanding); if (timer_reduce(&net->cells_timer, jiffies + delay * HZ)) afs_dec_cells_outstanding(net); + } else { + afs_queue_cell_manager(net); } } /* - * Look up and get an activation reference on a cell record under RCU - * conditions. The caller must hold the RCU read lock. + * Look up and get an activation reference on a cell record. The caller must + * hold net->cells_lock at least read-locked. */ -struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, - const char *name, unsigned int namesz) +static struct afs_cell *afs_find_cell_locked(struct afs_net *net, + const char *name, unsigned int namesz, + enum afs_cell_trace reason) { struct afs_cell *cell = NULL; struct rb_node *p; - int n, seq = 0, ret = 0; + int n; _enter("%*.*s", namesz, namesz, name); @@ -58,61 +63,48 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net, if (namesz > AFS_MAXCELLNAME) return ERR_PTR(-ENAMETOOLONG); - do { - /* Unfortunately, rbtree walking doesn't give reliable results - * under just the RCU read lock, so we have to check for - * changes. - */ - if (cell) - afs_put_cell(net, cell); - cell = NULL; - ret = -ENOENT; - - read_seqbegin_or_lock(&net->cells_lock, &seq); - - if (!name) { - cell = rcu_dereference_raw(net->ws_cell); - if (cell) { - afs_get_cell(cell); - ret = 0; - break; - } - ret = -EDESTADDRREQ; - continue; - } + if (!name) { + cell = net->ws_cell; + if (!cell) + return ERR_PTR(-EDESTADDRREQ); + goto found; + } - p = rcu_dereference_raw(net->cells.rb_node); - while (p) { - cell = rb_entry(p, struct afs_cell, net_node); - - n = strncasecmp(cell->name, name, - min_t(size_t, cell->name_len, namesz)); - if (n == 0) - n = cell->name_len - namesz; - if (n < 0) { - p = rcu_dereference_raw(p->rb_left); - } else if (n > 0) { - p = rcu_dereference_raw(p->rb_right); - } else { - if (atomic_inc_not_zero(&cell->usage)) { - ret = 0; - break; - } - /* We want to repeat the search, this time with - * the lock properly locked. - */ - } - cell = NULL; - } + p = net->cells.rb_node; + while (p) { + cell = rb_entry(p, struct afs_cell, net_node); - } while (need_seqretry(&net->cells_lock, seq)); + n = strncasecmp(cell->name, name, + min_t(size_t, cell->name_len, namesz)); + if (n == 0) + n = cell->name_len - namesz; + if (n < 0) + p = p->rb_left; + else if (n > 0) + p = p->rb_right; + else + goto found; + } - done_seqretry(&net->cells_lock, seq); + return ERR_PTR(-ENOENT); - if (ret != 0 && cell) - afs_put_cell(net, cell); +found: + return afs_use_cell(cell, reason); +} - return ret == 0 ? cell : ERR_PTR(ret); +/* + * Look up and get an activation reference on a cell record. + */ +struct afs_cell *afs_find_cell(struct afs_net *net, + const char *name, unsigned int namesz, + enum afs_cell_trace reason) +{ + struct afs_cell *cell; + + down_read(&net->cells_lock); + cell = afs_find_cell_locked(net, name, namesz, reason); + up_read(&net->cells_lock); + return cell; } /* @@ -154,16 +146,30 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, return ERR_PTR(-ENOMEM); } + cell->name = kmalloc(namelen + 1, GFP_KERNEL); + if (!cell->name) { + kfree(cell); + return ERR_PTR(-ENOMEM); + } + cell->net = net; cell->name_len = namelen; for (i = 0; i < namelen; i++) cell->name[i] = tolower(name[i]); - - atomic_set(&cell->usage, 2); - INIT_WORK(&cell->manager, afs_manage_cell); - INIT_LIST_HEAD(&cell->proc_volumes); - rwlock_init(&cell->proc_lock); + cell->name[i] = 0; + + refcount_set(&cell->ref, 1); + atomic_set(&cell->active, 0); + INIT_WORK(&cell->manager, afs_manage_cell_work); + cell->volumes = RB_ROOT; + INIT_HLIST_HEAD(&cell->proc_volumes); + seqlock_init(&cell->volume_lock); + cell->fs_servers = RB_ROOT; + seqlock_init(&cell->fs_lock); + INIT_LIST_HEAD(&cell->fs_open_mmaps); + init_rwsem(&cell->fs_open_mmaps_lock); rwlock_init(&cell->vl_servers_lock); + cell->flags = (1 << AFS_CELL_FL_CHECK_ALIAS); /* Provide a VL server list, filling it in if we were given a list of * addresses to use. @@ -195,6 +201,9 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, cell->dns_source = vllist->source; cell->dns_status = vllist->status; smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */ + atomic_inc(&net->cells_outstanding); + cell->debug_id = atomic_inc_return(&cell_debug_id); + trace_afs_cell(cell->debug_id, 1, 0, afs_cell_trace_alloc); _leave(" = %p", cell); return cell; @@ -203,6 +212,7 @@ parse_failed: if (ret == -EINVAL) printk(KERN_ERR "kAFS: bad VL server IP address\n"); error: + kfree(cell->name); kfree(cell); _leave(" = %d", ret); return ERR_PTR(ret); @@ -233,9 +243,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, _enter("%s,%s", name, vllist); if (!excl) { - rcu_read_lock(); - cell = afs_lookup_cell_rcu(net, name, namesz); - rcu_read_unlock(); + cell = afs_find_cell(net, name, namesz, afs_cell_trace_use_lookup); if (!IS_ERR(cell)) goto wait_for_cell; } @@ -256,7 +264,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, /* Find the insertion point and check to see if someone else added a * cell whilst we were allocating. */ - write_seqlock(&net->cells_lock); + down_write(&net->cells_lock); pp = &net->cells.rb_node; parent = NULL; @@ -278,23 +286,26 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, cell = candidate; candidate = NULL; + atomic_set(&cell->active, 2); + trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 2, afs_cell_trace_insert); rb_link_node_rcu(&cell->net_node, parent, pp); rb_insert_color(&cell->net_node, &net->cells); - atomic_inc(&net->cells_outstanding); - write_sequnlock(&net->cells_lock); + up_write(&net->cells_lock); - queue_work(afs_wq, &cell->manager); + afs_queue_cell(cell, afs_cell_trace_get_queue_new); wait_for_cell: + trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), atomic_read(&cell->active), + afs_cell_trace_wait); _debug("wait_for_cell"); wait_var_event(&cell->state, ({ state = smp_load_acquire(&cell->state); /* vs error */ - state == AFS_CELL_ACTIVE || state == AFS_CELL_FAILED; + state == AFS_CELL_ACTIVE || state == AFS_CELL_REMOVED; })); /* Check the state obtained from the wait check. */ - if (state == AFS_CELL_FAILED) { + if (state == AFS_CELL_REMOVED) { ret = cell->error; goto error; } @@ -308,16 +319,17 @@ cell_already_exists: if (excl) { ret = -EEXIST; } else { - afs_get_cell(cursor); + afs_use_cell(cursor, afs_cell_trace_use_lookup); ret = 0; } - write_sequnlock(&net->cells_lock); - kfree(candidate); + up_write(&net->cells_lock); + if (candidate) + afs_put_cell(candidate, afs_cell_trace_put_candidate); if (ret == 0) goto wait_for_cell; goto error_noput; error: - afs_put_cell(net, cell); + afs_unuse_cell(net, cell, afs_cell_trace_unuse_lookup); error_noput: _leave(" = %d [error]", ret); return ERR_PTR(ret); @@ -362,15 +374,16 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) } if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags)) - afs_get_cell(new_root); + afs_use_cell(new_root, afs_cell_trace_use_pin); /* install the new cell */ - write_seqlock(&net->cells_lock); - old_root = rcu_access_pointer(net->ws_cell); - rcu_assign_pointer(net->ws_cell, new_root); - write_sequnlock(&net->cells_lock); + down_write(&net->cells_lock); + afs_see_cell(new_root, afs_cell_trace_see_ws); + old_root = net->ws_cell; + net->ws_cell = new_root; + up_write(&net->cells_lock); - afs_put_cell(net, old_root); + afs_unuse_cell(net, old_root, afs_cell_trace_unuse_ws); _leave(" = 0"); return 0; } @@ -476,15 +489,22 @@ out_wake: static void afs_cell_destroy(struct rcu_head *rcu) { struct afs_cell *cell = container_of(rcu, struct afs_cell, rcu); + struct afs_net *net = cell->net; + int r; _enter("%p{%s}", cell, cell->name); - ASSERTCMP(atomic_read(&cell->usage), ==, 0); + r = refcount_read(&cell->ref); + ASSERTCMP(r, ==, 0); + trace_afs_cell(cell->debug_id, r, atomic_read(&cell->active), afs_cell_trace_free); - afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers)); + afs_put_vlserverlist(net, rcu_access_pointer(cell->vl_servers)); + afs_unuse_cell(net, cell->alias_of, afs_cell_trace_unuse_alias); key_put(cell->anonymous_key); + kfree(cell->name); kfree(cell); + afs_dec_cells_outstanding(net); _leave(" [destroyed]"); } @@ -517,18 +537,60 @@ void afs_cells_timer(struct timer_list *timer) /* * Get a reference on a cell record. */ -struct afs_cell *afs_get_cell(struct afs_cell *cell) +struct afs_cell *afs_get_cell(struct afs_cell *cell, enum afs_cell_trace reason) { - atomic_inc(&cell->usage); + int r; + + __refcount_inc(&cell->ref, &r); + trace_afs_cell(cell->debug_id, r + 1, atomic_read(&cell->active), reason); return cell; } /* * Drop a reference on a cell record. */ -void afs_put_cell(struct afs_net *net, struct afs_cell *cell) +void afs_put_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + if (cell) { + unsigned int debug_id = cell->debug_id; + unsigned int a; + bool zero; + int r; + + a = atomic_read(&cell->active); + zero = __refcount_dec_and_test(&cell->ref, &r); + trace_afs_cell(debug_id, r - 1, a, reason); + if (zero) { + a = atomic_read(&cell->active); + WARN(a != 0, "Cell active count %u > 0\n", a); + call_rcu(&cell->rcu, afs_cell_destroy); + } + } +} + +/* + * Note a cell becoming more active. + */ +struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason) { + int r, a; + + r = refcount_read(&cell->ref); + WARN_ON(r == 0); + a = atomic_inc_return(&cell->active); + trace_afs_cell(cell->debug_id, r, a, reason); + return cell; +} + +/* + * Record a cell becoming less active. When the active counter reaches 1, it + * is scheduled for destruction, but may get reactivated. + */ +void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason) +{ + unsigned int debug_id; time64_t now, expire_delay; + int r, a; if (!cell) return; @@ -541,11 +603,36 @@ void afs_put_cell(struct afs_net *net, struct afs_cell *cell) if (cell->vl_servers->nr_servers) expire_delay = afs_cell_gc_delay; - if (atomic_dec_return(&cell->usage) > 1) - return; + debug_id = cell->debug_id; + r = refcount_read(&cell->ref); + a = atomic_dec_return(&cell->active); + trace_afs_cell(debug_id, r, a, reason); + WARN_ON(a == 0); + if (a == 1) + /* 'cell' may now be garbage collected. */ + afs_set_cell_timer(net, expire_delay); +} + +/* + * Note that a cell has been seen. + */ +void afs_see_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + int r, a; - /* 'cell' may now be garbage collected. */ - afs_set_cell_timer(net, expire_delay); + r = refcount_read(&cell->ref); + a = atomic_read(&cell->active); + trace_afs_cell(cell->debug_id, r, a, reason); +} + +/* + * Queue a cell for management, giving the workqueue a ref to hold. + */ +void afs_queue_cell(struct afs_cell *cell, enum afs_cell_trace reason) +{ + afs_get_cell(cell, reason); + if (!queue_work(afs_wq, &cell->manager)) + afs_put_cell(cell, afs_cell_trace_put_queue_fail); } /* @@ -590,13 +677,6 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell) return ret; } -#ifdef CONFIG_AFS_FSCACHE - cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, - &afs_cell_cache_index_def, - cell->name, strlen(cell->name), - NULL, 0, - cell, 0, true); -#endif ret = afs_proc_cell_setup(cell); if (ret < 0) return ret; @@ -633,11 +713,6 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) afs_dynroot_rmdir(net, cell); mutex_unlock(&net->proc_cells_lock); -#ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(cell->cache, NULL, false); - cell->cache = NULL; -#endif - _leave(""); } @@ -645,12 +720,10 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell) * Manage a cell record, initialising and destroying it, maintaining its DNS * records. */ -static void afs_manage_cell(struct work_struct *work) +static void afs_manage_cell(struct afs_cell *cell) { - struct afs_cell *cell = container_of(work, struct afs_cell, manager); struct afs_net *net = cell->net; - bool deleted; - int ret, usage; + int ret, active; _enter("%s", cell->name); @@ -659,14 +732,19 @@ again: switch (cell->state) { case AFS_CELL_INACTIVE: case AFS_CELL_FAILED: - write_seqlock(&net->cells_lock); - usage = 1; - deleted = atomic_try_cmpxchg_relaxed(&cell->usage, &usage, 0); - if (deleted) + down_write(&net->cells_lock); + active = 1; + if (atomic_try_cmpxchg_relaxed(&cell->active, &active, 0)) { rb_erase(&cell->net_node, &net->cells); - write_sequnlock(&net->cells_lock); - if (deleted) + trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), 0, + afs_cell_trace_unuse_delete); + smp_store_release(&cell->state, AFS_CELL_REMOVED); + } + up_write(&net->cells_lock); + if (cell->state == AFS_CELL_REMOVED) { + wake_up_var(&cell->state); goto final_destruction; + } if (cell->state == AFS_CELL_FAILED) goto done; smp_store_release(&cell->state, AFS_CELL_UNSET); @@ -688,7 +766,7 @@ again: goto again; case AFS_CELL_ACTIVE: - if (atomic_read(&cell->usage) > 1) { + if (atomic_read(&cell->active) > 1) { if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) { ret = afs_update_cell(cell); if (ret < 0) @@ -701,13 +779,16 @@ again: goto again; case AFS_CELL_DEACTIVATING: - if (atomic_read(&cell->usage) > 1) + if (atomic_read(&cell->active) > 1) goto reverse_deactivation; afs_deactivate_cell(net, cell); smp_store_release(&cell->state, AFS_CELL_INACTIVE); wake_up_var(&cell->state); goto again; + case AFS_CELL_REMOVED: + goto done; + default: break; } @@ -733,9 +814,18 @@ done: return; final_destruction: - call_rcu(&cell->rcu, afs_cell_destroy); - afs_dec_cells_outstanding(net); - _leave(" [destruct %d]", atomic_read(&net->cells_outstanding)); + /* The root volume is pinning the cell */ + afs_put_volume(cell->net, cell->root_volume, afs_volume_trace_put_cell_root); + cell->root_volume = NULL; + afs_put_cell(cell, afs_cell_trace_put_destroy); +} + +static void afs_manage_cell_work(struct work_struct *work) +{ + struct afs_cell *cell = container_of(work, struct afs_cell, manager); + + afs_manage_cell(cell); + afs_put_cell(cell, afs_cell_trace_put_queue_work); } /* @@ -764,26 +854,29 @@ void afs_manage_cells(struct work_struct *work) * lack of use and cells whose DNS results have expired and dispatch * their managers. */ - read_seqlock_excl(&net->cells_lock); + down_read(&net->cells_lock); for (cursor = rb_first(&net->cells); cursor; cursor = rb_next(cursor)) { struct afs_cell *cell = rb_entry(cursor, struct afs_cell, net_node); - unsigned usage; + unsigned active; bool sched_cell = false; - usage = atomic_read(&cell->usage); - _debug("manage %s %u", cell->name, usage); + active = atomic_read(&cell->active); + trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), + active, afs_cell_trace_manage); - ASSERTCMP(usage, >=, 1); + ASSERTCMP(active, >=, 1); if (purging) { - if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) - usage = atomic_dec_return(&cell->usage); - ASSERTCMP(usage, ==, 1); + if (test_and_clear_bit(AFS_CELL_FL_NO_GC, &cell->flags)) { + active = atomic_dec_return(&cell->active); + trace_afs_cell(cell->debug_id, refcount_read(&cell->ref), + active, afs_cell_trace_unuse_pin); + } } - if (usage == 1) { + if (active == 1) { struct afs_vlserver_list *vllist; time64_t expire_at = cell->last_inactive; @@ -806,10 +899,10 @@ void afs_manage_cells(struct work_struct *work) } if (sched_cell) - queue_work(afs_wq, &cell->manager); + afs_queue_cell(cell, afs_cell_trace_get_queue_manage); } - read_sequnlock_excl(&net->cells_lock); + up_read(&net->cells_lock); /* Update the timer on the way out. We have to pass an increment on * cells_outstanding in the namespace that we are in to the timer or @@ -839,11 +932,11 @@ void afs_cell_purge(struct afs_net *net) _enter(""); - write_seqlock(&net->cells_lock); - ws = rcu_access_pointer(net->ws_cell); - RCU_INIT_POINTER(net->ws_cell, NULL); - write_sequnlock(&net->cells_lock); - afs_put_cell(net, ws); + down_write(&net->cells_lock); + ws = net->ws_cell; + net->ws_cell = NULL; + up_write(&net->cells_lock); + afs_unuse_cell(net, ws, afs_cell_trace_unuse_ws); _debug("del timer"); if (del_timer_sync(&net->cells_timer)) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index ff3994a6be23..0a090d614e76 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -29,16 +29,11 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *); static int afs_deliver_yfs_cb_callback(struct afs_call *); -#define CM_NAME(name) \ - char afs_SRXCB##name##_name[] __tracepoint_string = \ - "CB." #name - /* * CB.CallBack operation type */ -static CM_NAME(CallBack); static const struct afs_call_type afs_SRXCBCallBack = { - .name = afs_SRXCBCallBack_name, + .name = "CB.CallBack", .deliver = afs_deliver_cb_callback, .destructor = afs_cm_destructor, .work = SRXAFSCB_CallBack, @@ -47,9 +42,8 @@ static const struct afs_call_type afs_SRXCBCallBack = { /* * CB.InitCallBackState operation type */ -static CM_NAME(InitCallBackState); static const struct afs_call_type afs_SRXCBInitCallBackState = { - .name = afs_SRXCBInitCallBackState_name, + .name = "CB.InitCallBackState", .deliver = afs_deliver_cb_init_call_back_state, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, @@ -58,9 +52,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState = { /* * CB.InitCallBackState3 operation type */ -static CM_NAME(InitCallBackState3); static const struct afs_call_type afs_SRXCBInitCallBackState3 = { - .name = afs_SRXCBInitCallBackState3_name, + .name = "CB.InitCallBackState3", .deliver = afs_deliver_cb_init_call_back_state3, .destructor = afs_cm_destructor, .work = SRXAFSCB_InitCallBackState, @@ -69,9 +62,8 @@ static const struct afs_call_type afs_SRXCBInitCallBackState3 = { /* * CB.Probe operation type */ -static CM_NAME(Probe); static const struct afs_call_type afs_SRXCBProbe = { - .name = afs_SRXCBProbe_name, + .name = "CB.Probe", .deliver = afs_deliver_cb_probe, .destructor = afs_cm_destructor, .work = SRXAFSCB_Probe, @@ -80,9 +72,8 @@ static const struct afs_call_type afs_SRXCBProbe = { /* * CB.ProbeUuid operation type */ -static CM_NAME(ProbeUuid); static const struct afs_call_type afs_SRXCBProbeUuid = { - .name = afs_SRXCBProbeUuid_name, + .name = "CB.ProbeUuid", .deliver = afs_deliver_cb_probe_uuid, .destructor = afs_cm_destructor, .work = SRXAFSCB_ProbeUuid, @@ -91,9 +82,8 @@ static const struct afs_call_type afs_SRXCBProbeUuid = { /* * CB.TellMeAboutYourself operation type */ -static CM_NAME(TellMeAboutYourself); static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { - .name = afs_SRXCBTellMeAboutYourself_name, + .name = "CB.TellMeAboutYourself", .deliver = afs_deliver_cb_tell_me_about_yourself, .destructor = afs_cm_destructor, .work = SRXAFSCB_TellMeAboutYourself, @@ -102,9 +92,8 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { /* * YFS CB.CallBack operation type */ -static CM_NAME(YFS_CallBack); static const struct afs_call_type afs_SRXYFSCB_CallBack = { - .name = afs_SRXCBYFS_CallBack_name, + .name = "YFSCB.CallBack", .deliver = afs_deliver_yfs_cb_callback, .destructor = afs_cm_destructor, .work = SRXAFSCB_CallBack, @@ -118,8 +107,6 @@ bool afs_cm_incoming_call(struct afs_call *call) { _enter("{%u, CB.OP %u}", call->service_id, call->operation_ID); - call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall); - switch (call->operation_ID) { case CBCallBack: call->type = &afs_SRXCBCallBack; @@ -150,49 +137,6 @@ bool afs_cm_incoming_call(struct afs_call *call) } /* - * Record a probe to the cache manager from a server. - */ -static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server) -{ - _enter(""); - - if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) && - !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) { - if (server->cm_epoch == call->epoch) - return 0; - - if (!server->probe.said_rebooted) { - pr_notice("kAFS: FS rebooted %pU\n", &server->uuid); - server->probe.said_rebooted = true; - } - } - - spin_lock(&server->probe_lock); - - if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) { - server->cm_epoch = call->epoch; - server->probe.cm_epoch = call->epoch; - goto out; - } - - if (server->probe.cm_probed && - call->epoch != server->probe.cm_epoch && - !server->probe.said_inconsistent) { - pr_notice("kAFS: FS endpoints inconsistent %pU\n", - &server->uuid); - server->probe.said_inconsistent = true; - } - - if (!server->probe.cm_probed || call->epoch == server->cm_epoch) - server->probe.cm_epoch = server->cm_epoch; - -out: - server->probe.cm_probed = true; - spin_unlock(&server->probe_lock); - return 0; -} - -/* * Find the server record by peer address and record a probe to the cache * manager from a server. */ @@ -210,7 +154,7 @@ static int afs_find_cm_server_by_peer(struct afs_call *call) } call->server = server; - return afs_record_cm_probe(call, server); + return 0; } /* @@ -231,7 +175,7 @@ static int afs_find_cm_server_by_uuid(struct afs_call *call, } call->server = server; - return afs_record_cm_probe(call, server); + return 0; } /* @@ -244,6 +188,17 @@ static void afs_cm_destructor(struct afs_call *call) } /* + * Abort a service call from within an action function. + */ +static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error, + const char *why) +{ + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + abort_code, error, why); + afs_set_call_complete(call, error, 0); +} + +/* * The server supplied a list of callbacks that it wanted to break. */ static void SRXAFSCB_CallBack(struct work_struct *work) @@ -257,7 +212,9 @@ static void SRXAFSCB_CallBack(struct work_struct *work) * to maintain cache coherency. */ if (call->server) { - trace_afs_server(call->server, atomic_read(&call->server->usage), + trace_afs_server(call->server->debug_id, + refcount_read(&call->server->ref), + atomic_read(&call->server->active), afs_server_trace_callback); afs_break_callbacks(call->server, call->count, call->request); } @@ -284,7 +241,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->unmarshall++; /* extract the FID array and its count in two steps */ - /* fall through */ + fallthrough; case 1: _debug("extract FID count"); ret = afs_extract_data(call, true); @@ -294,8 +251,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); if (call->count > AFSCBMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_cb_fid_count); + return afs_protocol_error(call, afs_eproto_cb_fid_count); call->buffer = kmalloc(array3_size(call->count, 3, 4), GFP_KERNEL); @@ -304,7 +260,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) afs_extract_to_buf(call, call->count * 3 * 4); call->unmarshall++; - /* Fall through */ + fallthrough; case 2: _debug("extract FID array"); ret = afs_extract_data(call, true); @@ -330,7 +286,7 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->unmarshall++; /* extract the callback array and its count in two steps */ - /* fall through */ + fallthrough; case 3: _debug("extract CB count"); ret = afs_extract_data(call, true); @@ -340,13 +296,12 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->count2 = ntohl(call->tmp); _debug("CB count: %u", call->count2); if (call->count2 != call->count && call->count2 != 0) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_cb_count); + return afs_protocol_error(call, afs_eproto_cb_count); call->iter = &call->def_iter; iov_iter_discard(&call->def_iter, READ, call->count2 * 3 * 4); call->unmarshall++; - /* Fall through */ + fallthrough; case 4: _debug("extract discard %zu/%u", iov_iter_count(call->iter), call->count2 * 3 * 4); @@ -356,6 +311,8 @@ static int afs_deliver_cb_callback(struct afs_call *call) return ret; call->unmarshall++; + fallthrough; + case 5: break; } @@ -425,7 +382,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) afs_extract_to_buf(call, 11 * sizeof(__be32)); call->unmarshall++; - /* Fall through */ + fallthrough; case 1: _debug("extract UUID"); ret = afs_extract_data(call, false); @@ -452,6 +409,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) r->node[loop] = ntohl(b[loop + 5]); call->unmarshall++; + fallthrough; case 2: break; @@ -498,7 +456,8 @@ static int afs_deliver_cb_probe(struct afs_call *call) } /* - * allow the fileserver to quickly find out if the fileserver has been rebooted + * Allow the fileserver to quickly find out if the cache manager has been + * rebooted. */ static void SRXAFSCB_ProbeUuid(struct work_struct *work) { @@ -510,8 +469,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) afs_send_empty_reply(call); else - rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - 1, 1, "K-1"); + afs_abort_service_call(call, 1, 1, "K-1"); afs_put_call(call); _leave(""); @@ -537,7 +495,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) afs_extract_to_buf(call, 11 * sizeof(__be32)); call->unmarshall++; - /* Fall through */ + fallthrough; case 1: _debug("extract UUID"); ret = afs_extract_data(call, false); @@ -564,6 +522,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) r->node[loop] = ntohl(b[loop + 5]); call->unmarshall++; + fallthrough; case 2: break; @@ -571,7 +530,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING)) return afs_io_error(call, afs_io_error_cm_reply); - return afs_find_cm_server_by_uuid(call, call->request); + return afs_find_cm_server_by_peer(call); } /* @@ -652,7 +611,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call) call->unmarshall++; /* extract the FID array and its count in two steps */ - /* Fall through */ + fallthrough; case 1: _debug("extract FID count"); ret = afs_extract_data(call, true); @@ -662,8 +621,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call) call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); if (call->count > YFSCBMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_cb_fid_count); + return afs_protocol_error(call, afs_eproto_cb_fid_count); size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid)); call->buffer = kmalloc(size, GFP_KERNEL); @@ -672,7 +630,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call) afs_extract_to_buf(call, size); call->unmarshall++; - /* Fall through */ + fallthrough; case 2: _debug("extract FID array"); ret = afs_extract_data(call, false); @@ -698,6 +656,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; + fallthrough; case 3: break; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5c794f4b051a..230c2d19116d 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -24,27 +24,29 @@ static int afs_readdir(struct file *file, struct dir_context *ctx); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); static void afs_d_iput(struct dentry *dentry, struct inode *inode); -static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, +static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); -static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, +static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); -static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl); -static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +static int afs_create(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl); +static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode); static int afs_rmdir(struct inode *dir, struct dentry *dentry); static int afs_unlink(struct inode *dir, struct dentry *dentry); static int afs_link(struct dentry *from, struct inode *dir, struct dentry *dentry); -static int afs_symlink(struct inode *dir, struct dentry *dentry, - const char *content); -static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags); -static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags); -static void afs_dir_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); - -static int afs_dir_set_page_dirty(struct page *page) +static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, const char *content); +static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags); +static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags); +static void afs_dir_invalidate_folio(struct folio *folio, size_t offset, + size_t length); + +static bool afs_dir_dirty_folio(struct address_space *mapping, + struct folio *folio) { BUG(); /* This should never happen. */ } @@ -69,13 +71,12 @@ const struct inode_operations afs_dir_inode_operations = { .permission = afs_permission, .getattr = afs_getattr, .setattr = afs_setattr, - .listxattr = afs_listxattr, }; const struct address_space_operations afs_dir_aops = { - .set_page_dirty = afs_dir_set_page_dirty, - .releasepage = afs_dir_releasepage, - .invalidatepage = afs_dir_invalidatepage, + .dirty_folio = afs_dir_dirty_folio, + .release_folio = afs_dir_release_folio, + .invalidate_folio = afs_dir_invalidate_folio, }; const struct dentry_operations afs_fs_dentry_operations = { @@ -99,57 +100,75 @@ struct afs_lookup_cookie { bool found; bool one_only; unsigned short nr_fids; - struct inode **inodes; - struct afs_status_cb *statuses; struct afs_fid fids[50]; }; /* - * check that a directory page is valid + * Drop the refs that we're holding on the folios we were reading into. We've + * got refs on the first nr_pages pages. */ -static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page, - loff_t i_size) +static void afs_dir_read_cleanup(struct afs_read *req) { - struct afs_xdr_dir_page *dbuf; - loff_t latter, off; - int tmp, qty; + struct address_space *mapping = req->vnode->netfs.inode.i_mapping; + struct folio *folio; + pgoff_t last = req->nr_pages - 1; - /* Determine how many magic numbers there should be in this page, but + XA_STATE(xas, &mapping->i_pages, 0); + + if (unlikely(!req->nr_pages)) + return; + + rcu_read_lock(); + xas_for_each(&xas, folio, last) { + if (xas_retry(&xas, folio)) + continue; + BUG_ON(xa_is_value(folio)); + ASSERTCMP(folio_file_mapping(folio), ==, mapping); + + folio_put(folio); + } + + rcu_read_unlock(); +} + +/* + * check that a directory folio is valid + */ +static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio, + loff_t i_size) +{ + union afs_xdr_dir_block *block; + size_t offset, size; + loff_t pos; + + /* Determine how many magic numbers there should be in this folio, but * we must take care because the directory may change size under us. */ - off = page_offset(page); - if (i_size <= off) + pos = folio_pos(folio); + if (i_size <= pos) goto checked; - latter = i_size - off; - if (latter >= PAGE_SIZE) - qty = PAGE_SIZE; - else - qty = latter; - qty /= sizeof(union afs_xdr_dir_block); - - /* check them */ - dbuf = kmap(page); - for (tmp = 0; tmp < qty; tmp++) { - if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) { - printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n", - __func__, dvnode->vfs_inode.i_ino, tmp, qty, - ntohs(dbuf->blocks[tmp].hdr.magic)); - trace_afs_dir_check_failed(dvnode, off, i_size); - kunmap(page); + size = min_t(loff_t, folio_size(folio), i_size - pos); + for (offset = 0; offset < size; offset += sizeof(*block)) { + block = kmap_local_folio(folio, offset); + if (block->hdr.magic != AFS_DIR_MAGIC) { + printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n", + __func__, dvnode->netfs.inode.i_ino, + pos, offset, size, ntohs(block->hdr.magic)); + trace_afs_dir_check_failed(dvnode, pos + offset, i_size); + kunmap_local(block); trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic); goto error; } /* Make sure each block is NUL terminated so we can reasonably - * use string functions on it. The filenames in the page + * use string functions on it. The filenames in the folio * *should* be NUL-terminated anyway. */ - ((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0; - } - - kunmap(page); + ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0; + kunmap_local(block); + } checked: afs_stat_v(dvnode, n_read_dir); return true; @@ -159,35 +178,71 @@ error: } /* - * Check the contents of a directory that we've just read. + * Dump the contents of a directory. */ -static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req) +static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req) { - struct afs_xdr_dir_page *dbuf; - unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block); + union afs_xdr_dir_block *block; + struct address_space *mapping = dvnode->netfs.inode.i_mapping; + struct folio *folio; + pgoff_t last = req->nr_pages - 1; + size_t offset, size; - for (i = 0; i < req->nr_pages; i++) - if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len)) - goto bad; - return true; + XA_STATE(xas, &mapping->i_pages, 0); -bad: - pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n", + pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n", dvnode->fid.vid, dvnode->fid.vnode, - req->file_size, req->len, req->actual_len, req->remain); - pr_warn("DIR %llx %x %x %x\n", - req->pos, req->index, req->nr_pages, req->offset); + req->file_size, req->len, req->actual_len); + pr_warn("DIR %llx %x %zx %zx\n", + req->pos, req->nr_pages, + req->iter->iov_offset, iov_iter_count(req->iter)); - for (i = 0; i < req->nr_pages; i++) { - dbuf = kmap(req->pages[i]); - for (j = 0; j < qty; j++) { - union afs_xdr_dir_block *block = &dbuf->blocks[j]; + xas_for_each(&xas, folio, last) { + if (xas_retry(&xas, folio)) + continue; + + BUG_ON(folio_file_mapping(folio) != mapping); - pr_warn("[%02x] %32phN\n", i * qty + j, block); + size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio)); + for (offset = 0; offset < size; offset += sizeof(*block)) { + block = kmap_local_folio(folio, offset); + pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block); + kunmap_local(block); } - kunmap(req->pages[i]); } - return false; +} + +/* + * Check all the blocks in a directory. All the folios are held pinned. + */ +static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req) +{ + struct address_space *mapping = dvnode->netfs.inode.i_mapping; + struct folio *folio; + pgoff_t last = req->nr_pages - 1; + int ret = 0; + + XA_STATE(xas, &mapping->i_pages, 0); + + if (unlikely(!req->nr_pages)) + return 0; + + rcu_read_lock(); + xas_for_each(&xas, folio, last) { + if (xas_retry(&xas, folio)) + continue; + + BUG_ON(folio_file_mapping(folio) != mapping); + + if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) { + afs_dir_dump(dvnode, req); + ret = -EIO; + break; + } + } + + rcu_read_unlock(); + return ret; } /* @@ -208,89 +263,78 @@ static int afs_dir_open(struct inode *inode, struct file *file) /* * Read the directory into the pagecache in one go, scrubbing the previous - * contents. The list of pages is returned, pinning them so that they don't + * contents. The list of folios is returned, pinning them so that they don't * get reclaimed during the iteration. */ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) __acquires(&dvnode->validate_lock) { + struct address_space *mapping = dvnode->netfs.inode.i_mapping; struct afs_read *req; loff_t i_size; - int nr_pages, nr_inline, i, n; - int ret = -ENOMEM; + int nr_pages, i; + int ret; + + _enter(""); -retry: - i_size = i_size_read(&dvnode->vfs_inode); - if (i_size < 2048) - return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small)); + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return ERR_PTR(-ENOMEM); + + refcount_set(&req->usage, 1); + req->vnode = dvnode; + req->key = key_get(key); + req->cleanup = afs_dir_read_cleanup; + +expand: + i_size = i_size_read(&dvnode->netfs.inode); + if (i_size < 2048) { + ret = afs_bad(dvnode, afs_file_error_dir_small); + goto error; + } if (i_size > 2048 * 1024) { trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big); - return ERR_PTR(-EFBIG); + ret = -EFBIG; + goto error; } _enter("%llu", i_size); - /* Get a request record to hold the page list. We want to hold it - * inline if we can, but we don't want to make an order 1 allocation. - */ nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE; - nr_inline = nr_pages; - if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *)) - nr_inline = 0; - - req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - refcount_set(&req->usage, 1); - req->nr_pages = nr_pages; req->actual_len = i_size; /* May change */ req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */ req->data_version = dvnode->status.data_version; /* May change */ - if (nr_inline > 0) { - req->pages = req->array; - } else { - req->pages = kcalloc(nr_pages, sizeof(struct page *), - GFP_KERNEL); - if (!req->pages) - goto error; - } + iov_iter_xarray(&req->def_iter, READ, &dvnode->netfs.inode.i_mapping->i_pages, + 0, i_size); + req->iter = &req->def_iter; - /* Get a list of all the pages that hold or will hold the directory - * content. We need to fill in any gaps that we might find where the - * memory reclaimer has been at work. If there are any gaps, we will + /* Fill in any gaps that we might find where the memory reclaimer has + * been at work and pin all the folios. If there are any gaps, we will * need to reread the entire directory contents. */ - i = 0; - do { - n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i, - req->nr_pages - i, - req->pages + i); - _debug("find %u at %u/%u", n, i, req->nr_pages); - if (n == 0) { - gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask; + i = req->nr_pages; + while (i < nr_pages) { + struct folio *folio; + folio = filemap_get_folio(mapping, i); + if (!folio) { if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_stat_v(dvnode, n_inval); ret = -ENOMEM; - req->pages[i] = __page_cache_alloc(gfp); - if (!req->pages[i]) - goto error; - ret = add_to_page_cache_lru(req->pages[i], - dvnode->vfs_inode.i_mapping, - i, gfp); - if (ret < 0) + folio = __filemap_get_folio(mapping, + i, FGP_LOCK | FGP_CREAT, + mapping->gfp_mask); + if (!folio) goto error; - - set_page_private(req->pages[i], 1); - SetPagePrivate(req->pages[i]); - unlock_page(req->pages[i]); - i++; - } else { - i += n; + folio_attach_private(folio, (void *)1); + folio_unlock(folio); } - } while (i < req->nr_pages); + + req->nr_pages += folio_nr_pages(folio); + i += folio_nr_pages(folio); + } /* If we're going to reload, we need to lock all the pages to prevent * races. @@ -308,18 +352,23 @@ retry: if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { trace_afs_reload_dir(dvnode); - ret = afs_fetch_data(dvnode, key, req); + ret = afs_fetch_data(dvnode, req); if (ret < 0) goto error_unlock; task_io_account_read(PAGE_SIZE * req->nr_pages); - if (req->len < req->file_size) - goto content_has_grown; + if (req->len < req->file_size) { + /* The content has grown, so we need to expand the + * buffer. + */ + up_write(&dvnode->validate_lock); + goto expand; + } /* Validate the data we just read. */ - ret = -EIO; - if (!afs_dir_check_pages(dvnode, req)) + ret = afs_dir_check(dvnode, req); + if (ret < 0) goto error_unlock; // TODO: Trim excess pages @@ -337,11 +386,6 @@ error: afs_put_read(req); _leave(" = %d", ret); return ERR_PTR(ret); - -content_has_grown: - up_write(&dvnode->validate_lock); - afs_put_read(req); - goto retry; } /* @@ -353,11 +397,11 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, unsigned blkoff) { union afs_xdr_dirent *dire; - unsigned offset, next, curr; + unsigned offset, next, curr, nr_slots; size_t nlen; int tmp; - _enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block); + _enter("%llx,%x", ctx->pos, blkoff); curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent); @@ -366,13 +410,12 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, offset < AFS_DIR_SLOTS_PER_BLOCK; offset = next ) { - next = offset + 1; - /* skip entries marked unused in the bitmap */ if (!(block->hdr.bitmap[offset / 8] & (1 << (offset % 8)))) { _debug("ENT[%zu.%u]: unused", blkoff / sizeof(union afs_xdr_dir_block), offset); + next = offset + 1; if (offset >= curr) ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); @@ -384,40 +427,47 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, nlen = strnlen(dire->u.name, sizeof(*block) - offset * sizeof(union afs_xdr_dirent)); + if (nlen > AFSNAMEMAX - 1) { + _debug("ENT[%zu]: name too long (len %u/%zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, nlen); + return afs_bad(dvnode, afs_file_error_dir_name_too_long); + } _debug("ENT[%zu.%u]: %s %zu \"%s\"", blkoff / sizeof(union afs_xdr_dir_block), offset, (offset < curr ? "skip" : "fill"), nlen, dire->u.name); - /* work out where the next possible entry is */ - for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) { - if (next >= AFS_DIR_SLOTS_PER_BLOCK) { - _debug("ENT[%zu.%u]:" - " %u travelled beyond end dir block" - " (len %u/%zu)", - blkoff / sizeof(union afs_xdr_dir_block), - offset, next, tmp, nlen); - return afs_bad(dvnode, afs_file_error_dir_over_end); - } - if (!(block->hdr.bitmap[next / 8] & - (1 << (next % 8)))) { - _debug("ENT[%zu.%u]:" - " %u unmarked extension (len %u/%zu)", + nr_slots = afs_dir_calc_slots(nlen); + next = offset + nr_slots; + if (next > AFS_DIR_SLOTS_PER_BLOCK) { + _debug("ENT[%zu.%u]:" + " %u extends beyond end dir block" + " (len %zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, next, nlen); + return afs_bad(dvnode, afs_file_error_dir_over_end); + } + + /* Check that the name-extension dirents are all allocated */ + for (tmp = 1; tmp < nr_slots; tmp++) { + unsigned int ix = offset + tmp; + if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) { + _debug("ENT[%zu.u]:" + " %u unmarked extension (%u/%u)", blkoff / sizeof(union afs_xdr_dir_block), - offset, next, tmp, nlen); + offset, tmp, nr_slots); return afs_bad(dvnode, afs_file_error_dir_unmarked_ext); } - - _debug("ENT[%zu.%u]: ext %u/%zu", - blkoff / sizeof(union afs_xdr_dir_block), - next, tmp, nlen); - next++; } /* skip if starts before the current position */ - if (offset < curr) + if (offset < curr) { + if (next > curr) + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); continue; + } /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, @@ -443,11 +493,10 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, struct key *key, afs_dataversion_t *_dir_version) { struct afs_vnode *dvnode = AFS_FS_I(dir); - struct afs_xdr_dir_page *dbuf; union afs_xdr_dir_block *dblock; struct afs_read *req; - struct page *page; - unsigned blkoff, limit; + struct folio *folio; + unsigned offset, size; int ret; _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos); @@ -469,37 +518,30 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, /* walk through the blocks in sequence */ ret = 0; while (ctx->pos < req->actual_len) { - blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1); - - /* Fetch the appropriate page from the directory and re-add it - * to the LRU. + /* Fetch the appropriate folio from the directory and re-add it + * to the LRU. We have all the pages pinned with an extra ref. */ - page = req->pages[blkoff / PAGE_SIZE]; - if (!page) { + folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE, + FGP_ACCESSED, 0); + if (!folio) { ret = afs_bad(dvnode, afs_file_error_dir_missing_page); break; } - mark_page_accessed(page); - limit = blkoff & ~(PAGE_SIZE - 1); + offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio); + size = min_t(loff_t, folio_size(folio), + req->actual_len - folio_file_pos(folio)); - dbuf = kmap(page); - - /* deal with the individual blocks stashed on this page */ do { - dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / - sizeof(union afs_xdr_dir_block)]; - ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff); - if (ret != 1) { - kunmap(page); + dblock = kmap_local_folio(folio, offset); + ret = afs_dir_iterate_block(dvnode, ctx, dblock, + folio_file_pos(folio) + offset); + kunmap_local(dblock); + if (ret != 1) goto out; - } - blkoff += sizeof(union afs_xdr_dir_block); + } while (offset += sizeof(*dblock), offset < size); - } while (ctx->pos < dir->i_size && blkoff < limit); - - kunmap(page); ret = 0; } @@ -526,7 +568,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx) * - if afs_dir_iterate_block() spots this function, it'll pass the FID * uniquifier through dtype */ -static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, +static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype) { struct afs_lookup_one_cookie *cookie = @@ -542,16 +584,16 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, if (cookie->name.len != nlen || memcmp(cookie->name.name, name, nlen) != 0) { - _leave(" = 0 [no]"); - return 0; + _leave(" = true [keep looking]"); + return true; } cookie->fid.vnode = ino; cookie->fid.unique = dtype; cookie->found = 1; - _leave(" = -1 [found]"); - return -1; + _leave(" = false [found]"); + return false; } /* @@ -579,7 +621,6 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, return ret; } - ret = -ENOENT; if (!cookie.found) { _leave(" = -ENOENT [not found]"); return -ENOENT; @@ -595,12 +636,11 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, * - if afs_dir_iterate_block() spots this function, it'll pass the FID * uniquifier through dtype */ -static int afs_lookup_filldir(struct dir_context *ctx, const char *name, +static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype) { struct afs_lookup_cookie *cookie = container_of(ctx, struct afs_lookup_cookie, ctx); - int ret; _enter("{%s,%u},%s,%u,,%llu,%u", cookie->name.name, cookie->name.len, name, nlen, @@ -618,15 +658,119 @@ static int afs_lookup_filldir(struct dir_context *ctx, const char *name, } } else if (cookie->name.len == nlen && memcmp(cookie->name.name, name, nlen) == 0) { - cookie->fids[0].vnode = ino; - cookie->fids[0].unique = dtype; + cookie->fids[1].vnode = ino; + cookie->fids[1].unique = dtype; cookie->found = 1; if (cookie->one_only) - return -1; + return false; } - ret = cookie->nr_fids >= 50 ? -1 : 0; - _leave(" = %d", ret); + return cookie->nr_fids < 50; +} + +/* + * Deal with the result of a successful lookup operation. Turn all the files + * into inodes and save the first one - which is the one we actually want. + */ +static void afs_do_lookup_success(struct afs_operation *op) +{ + struct afs_vnode_param *vp; + struct afs_vnode *vnode; + struct inode *inode; + u32 abort_code; + int i; + + _enter(""); + + for (i = 0; i < op->nr_files; i++) { + switch (i) { + case 0: + vp = &op->file[0]; + abort_code = vp->scb.status.abort_code; + if (abort_code != 0) { + op->ac.abort_code = abort_code; + op->error = afs_abort_to_error(abort_code); + } + break; + + case 1: + vp = &op->file[1]; + break; + + default: + vp = &op->more_files[i - 2]; + break; + } + + if (!vp->scb.have_status && !vp->scb.have_error) + continue; + + _debug("do [%u]", i); + if (vp->vnode) { + if (!test_bit(AFS_VNODE_UNSET, &vp->vnode->flags)) + afs_vnode_commit_status(op, vp); + } else if (vp->scb.status.abort_code == 0) { + inode = afs_iget(op, vp); + if (!IS_ERR(inode)) { + vnode = AFS_FS_I(inode); + afs_cache_permit(vnode, op->key, + 0 /* Assume vnode->cb_break is 0 */ + + op->cb_v_break, + &vp->scb); + vp->vnode = vnode; + vp->put_vnode = true; + } + } else { + _debug("- abort %d %llx:%llx.%x", + vp->scb.status.abort_code, + vp->fid.vid, vp->fid.vnode, vp->fid.unique); + } + } + + _leave(""); +} + +static const struct afs_operation_ops afs_inline_bulk_status_operation = { + .issue_afs_rpc = afs_fs_inline_bulk_status, + .issue_yfs_rpc = yfs_fs_inline_bulk_status, + .success = afs_do_lookup_success, +}; + +static const struct afs_operation_ops afs_lookup_fetch_status_operation = { + .issue_afs_rpc = afs_fs_fetch_status, + .issue_yfs_rpc = yfs_fs_fetch_status, + .success = afs_do_lookup_success, + .aborted = afs_check_for_remote_deletion, +}; + +/* + * See if we know that the server we expect to use doesn't support + * FS.InlineBulkStatus. + */ +static bool afs_server_supports_ibulk(struct afs_vnode *dvnode) +{ + struct afs_server_list *slist; + struct afs_volume *volume = dvnode->volume; + struct afs_server *server; + bool ret = true; + int i; + + if (!test_bit(AFS_VOLUME_MAYBE_NO_IBULK, &volume->flags)) + return true; + + rcu_read_lock(); + slist = rcu_dereference(volume->servers); + + for (i = 0; i < slist->nr_servers; i++) { + server = slist->servers[i].server; + if (server == dvnode->cb_server) { + if (test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags)) + ret = false; + break; + } + } + + rcu_read_unlock(); return ret; } @@ -639,16 +783,13 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, struct key *key) { struct afs_lookup_cookie *cookie; - struct afs_cb_interest *dcbi, *cbi = NULL; - struct afs_super_info *as = dir->i_sb->s_fs_info; - struct afs_status_cb *scb; - struct afs_iget_data iget_data; - struct afs_fs_cursor fc; - struct afs_server *server; + struct afs_vnode_param *vp; + struct afs_operation *op; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; struct inode *inode = NULL, *ti; afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version); - int ret, i; + long ret; + int i; _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); @@ -656,72 +797,75 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, if (!cookie) return ERR_PTR(-ENOMEM); + for (i = 0; i < ARRAY_SIZE(cookie->fids); i++) + cookie->fids[i].vid = dvnode->fid.vid; cookie->ctx.actor = afs_lookup_filldir; cookie->name = dentry->d_name; - cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */ - - read_seqlock_excl(&dvnode->cb_lock); - dcbi = rcu_dereference_protected(dvnode->cb_interest, - lockdep_is_held(&dvnode->cb_lock.lock)); - if (dcbi) { - server = dcbi->server; - if (server && - test_bit(AFS_SERVER_FL_NO_IBULK, &server->flags)) - cookie->one_only = true; - } - read_sequnlock_excl(&dvnode->cb_lock); + cookie->nr_fids = 2; /* slot 0 is saved for the fid we actually want + * and slot 1 for the directory */ - for (i = 0; i < 50; i++) - cookie->fids[i].vid = as->volume->vid; + if (!afs_server_supports_ibulk(dvnode)) + cookie->one_only = true; /* search the directory */ ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version); - if (ret < 0) { - inode = ERR_PTR(ret); + if (ret < 0) goto out; - } dentry->d_fsdata = (void *)(unsigned long)data_version; - inode = ERR_PTR(-ENOENT); + ret = -ENOENT; if (!cookie->found) goto out; /* Check to see if we already have an inode for the primary fid. */ - iget_data.fid = cookie->fids[0]; - iget_data.volume = dvnode->volume; - iget_data.cb_v_break = dvnode->volume->cb_v_break; - iget_data.cb_s_break = 0; - inode = ilookup5(dir->i_sb, cookie->fids[0].vnode, - afs_iget5_test, &iget_data); + inode = ilookup5(dir->i_sb, cookie->fids[1].vnode, + afs_ilookup5_test_by_fid, &cookie->fids[1]); if (inode) - goto out; + goto out; /* We do */ - /* Need space for examining all the selected files */ - inode = ERR_PTR(-ENOMEM); - cookie->statuses = kvcalloc(cookie->nr_fids, sizeof(struct afs_status_cb), - GFP_KERNEL); - if (!cookie->statuses) + /* Okay, we didn't find it. We need to query the server - and whilst + * we're doing that, we're going to attempt to look up a bunch of other + * vnodes also. + */ + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); goto out; + } - cookie->inodes = kcalloc(cookie->nr_fids, sizeof(struct inode *), - GFP_KERNEL); - if (!cookie->inodes) - goto out_s; + afs_op_set_vnode(op, 0, dvnode); + afs_op_set_fid(op, 1, &cookie->fids[1]); - for (i = 1; i < cookie->nr_fids; i++) { - scb = &cookie->statuses[i]; + op->nr_files = cookie->nr_fids; + _debug("nr_files %u", op->nr_files); - /* Find any inodes that already exist and get their - * callback counters. - */ - iget_data.fid = cookie->fids[i]; - ti = ilookup5_nowait(dir->i_sb, iget_data.fid.vnode, - afs_iget5_test, &iget_data); - if (!IS_ERR_OR_NULL(ti)) { - vnode = AFS_FS_I(ti); - scb->cb_break = afs_calc_vnode_cb_break(vnode); - cookie->inodes[i] = ti; + /* Need space for examining all the selected files */ + op->error = -ENOMEM; + if (op->nr_files > 2) { + op->more_files = kvcalloc(op->nr_files - 2, + sizeof(struct afs_vnode_param), + GFP_KERNEL); + if (!op->more_files) + goto out_op; + + for (i = 2; i < op->nr_files; i++) { + vp = &op->more_files[i - 2]; + vp->fid = cookie->fids[i]; + + /* Find any inodes that already exist and get their + * callback counters. + */ + ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode, + afs_ilookup5_test_by_fid, &vp->fid); + if (!IS_ERR_OR_NULL(ti)) { + vnode = AFS_FS_I(ti); + vp->dv_before = vnode->status.data_version; + vp->cb_break_before = afs_calc_vnode_cb_break(vnode); + vp->vnode = vnode; + vp->put_vnode = true; + vp->speculative = true; /* vnode not locked */ + } } } @@ -729,120 +873,40 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry, * lookups contained therein are stored in the reply without aborting * the whole operation. */ - if (cookie->one_only) - goto no_inline_bulk_status; - - inode = ERR_PTR(-ERESTARTSYS); - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - while (afs_select_fileserver(&fc)) { - if (test_bit(AFS_SERVER_FL_NO_IBULK, - &fc.cbi->server->flags)) { - fc.ac.abort_code = RX_INVALID_OPERATION; - fc.ac.error = -ECONNABORTED; - break; - } - iget_data.cb_v_break = dvnode->volume->cb_v_break; - iget_data.cb_s_break = fc.cbi->server->cb_s_break; - afs_fs_inline_bulk_status(&fc, - afs_v2net(dvnode), - cookie->fids, - cookie->statuses, - cookie->nr_fids, NULL); - } - - if (fc.ac.error == 0) - cbi = afs_get_cb_interest(fc.cbi); - if (fc.ac.abort_code == RX_INVALID_OPERATION) - set_bit(AFS_SERVER_FL_NO_IBULK, &fc.cbi->server->flags); - inode = ERR_PTR(afs_end_vnode_operation(&fc)); + op->error = -ENOTSUPP; + if (!cookie->one_only) { + op->ops = &afs_inline_bulk_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); } - if (!IS_ERR(inode)) - goto success; - if (fc.ac.abort_code != RX_INVALID_OPERATION) - goto out_c; - -no_inline_bulk_status: - /* We could try FS.BulkStatus next, but this aborts the entire op if - * any of the lookups fails - so, for the moment, revert to - * FS.FetchStatus for just the primary fid. - */ - inode = ERR_PTR(-ERESTARTSYS); - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - while (afs_select_fileserver(&fc)) { - iget_data.cb_v_break = dvnode->volume->cb_v_break; - iget_data.cb_s_break = fc.cbi->server->cb_s_break; - scb = &cookie->statuses[0]; - afs_fs_fetch_status(&fc, - afs_v2net(dvnode), - cookie->fids, - scb, - NULL); - } - - if (fc.ac.error == 0) - cbi = afs_get_cb_interest(fc.cbi); - inode = ERR_PTR(afs_end_vnode_operation(&fc)); + if (op->error == -ENOTSUPP) { + /* We could try FS.BulkStatus next, but this aborts the entire + * op if any of the lookups fails - so, for the moment, revert + * to FS.FetchStatus for op->file[1]. + */ + op->fetch_status.which = 1; + op->ops = &afs_lookup_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); } + inode = ERR_PTR(op->error); - if (IS_ERR(inode)) - goto out_c; - -success: - /* Turn all the files into inodes and save the first one - which is the - * one we actually want. - */ - scb = &cookie->statuses[0]; - if (scb->status.abort_code != 0) - inode = ERR_PTR(afs_abort_to_error(scb->status.abort_code)); - - for (i = 0; i < cookie->nr_fids; i++) { - struct afs_status_cb *scb = &cookie->statuses[i]; - - if (!scb->have_status && !scb->have_error) - continue; - - if (cookie->inodes[i]) { - struct afs_vnode *iv = AFS_FS_I(cookie->inodes[i]); - - if (test_bit(AFS_VNODE_UNSET, &iv->flags)) - continue; - - afs_vnode_commit_status(&fc, iv, - scb->cb_break, NULL, scb); - continue; - } - - if (scb->status.abort_code != 0) - continue; - - iget_data.fid = cookie->fids[i]; - ti = afs_iget(dir->i_sb, key, &iget_data, scb, cbi, dvnode); - if (!IS_ERR(ti)) - afs_cache_permit(AFS_FS_I(ti), key, - 0 /* Assume vnode->cb_break is 0 */ + - iget_data.cb_v_break, - scb); - if (i == 0) { - inode = ti; - } else { - if (!IS_ERR(ti)) - iput(ti); - } +out_op: + if (op->error == 0) { + inode = &op->file[1].vnode->netfs.inode; + op->file[1].vnode = NULL; } -out_c: - afs_put_cb_interest(afs_v2net(dvnode), cbi); - if (cookie->inodes) { - for (i = 0; i < cookie->nr_fids; i++) - iput(cookie->inodes[i]); - kfree(cookie->inodes); - } -out_s: - kvfree(cookie->statuses); + if (op->file[0].scb.have_status) + dentry->d_fsdata = (void *)(unsigned long)op->file[0].scb.status.data_version; + else + dentry->d_fsdata = (void *)(unsigned long)op->file[0].dv_before; + ret = afs_put_operation(op); out: kfree(cookie); - return inode; + _leave(""); + return inode ?: ERR_PTR(ret); } /* @@ -958,6 +1022,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, if (!IS_ERR_OR_NULL(inode)) fid = AFS_FS_I(inode)->fid; + _debug("splice %p", dentry->d_inode); d = d_splice_alias(inode, dentry); if (!IS_ERR_OR_NULL(d)) { d->d_fsdata = dentry->d_fsdata; @@ -965,6 +1030,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, } else { trace_afs_lookup(dvnode, &dentry->d_name, &fid); } + _leave(""); return d; } @@ -973,9 +1039,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, */ static int afs_d_revalidate_rcu(struct dentry *dentry) { - struct afs_vnode *dvnode, *vnode; + struct afs_vnode *dvnode; struct dentry *parent; - struct inode *dir, *inode; + struct inode *dir; long dir_version, de_version; _enter("%p", dentry); @@ -1005,18 +1071,6 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) return -ECHILD; } - /* Check to see if the vnode referred to by the dentry still - * has a callback. - */ - if (d_really_is_positive(dentry)) { - inode = d_inode_rcu(dentry); - if (inode) { - vnode = AFS_FS_I(inode); - if (!afs_check_validity(vnode)) - return -ECHILD; - } - } - return 1; /* Still valid */ } @@ -1028,11 +1082,11 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct afs_vnode *vnode, *dir; - struct afs_fid uninitialized_var(fid); + struct afs_fid fid; struct dentry *parent; struct inode *inode; struct key *key; - afs_dataversion_t dir_version; + afs_dataversion_t dir_version, invalid_before; long de_version; int ret; @@ -1052,17 +1106,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (IS_ERR(key)) key = NULL; - if (d_really_is_positive(dentry)) { - inode = d_inode(dentry); - if (inode) { - vnode = AFS_FS_I(inode); - afs_validate(vnode, key); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - goto out_bad; - } - } - - /* lock down the parent dentry so we can peer at it */ + /* Hold the parent dentry so we can peer at it */ parent = dget_parent(dentry); dir = AFS_FS_I(d_inode(parent)); @@ -1071,7 +1115,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (test_bit(AFS_VNODE_DELETED, &dir->flags)) { _debug("%pd: parent dir deleted", dentry); - goto out_bad_parent; + goto not_found; } /* We only need to invalidate a dentry if the server's copy changed @@ -1084,25 +1128,25 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) if (de_version == (long)dir_version) goto out_valid_noupdate; - dir_version = dir->invalid_before; - if (de_version - (long)dir_version >= 0) + invalid_before = dir->invalid_before; + if (de_version - (long)invalid_before >= 0) goto out_valid; _debug("dir modified"); afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key, &dir_version); + ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, key, &dir_version); switch (ret) { case 0: /* the filename maps to something */ if (d_really_is_negative(dentry)) - goto out_bad_parent; + goto not_found; inode = d_inode(dentry); if (is_bad_inode(inode)) { printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n", dentry); - goto out_bad_parent; + goto not_found; } vnode = AFS_FS_I(inode); @@ -1123,10 +1167,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) _debug("%pd: file deleted (uq %u -> %u I:%u)", dentry, fid.unique, vnode->fid.unique, - vnode->vfs_inode.i_generation); - write_seqlock(&vnode->cb_lock); - set_bit(AFS_VNODE_DELETED, &vnode->flags); - write_sequnlock(&vnode->cb_lock); + vnode->netfs.inode.i_generation); goto not_found; } goto out_valid; @@ -1141,7 +1182,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) default: _debug("failed to iterate dir %pd: %d", parent, ret); - goto out_bad_parent; + goto not_found; } out_valid: @@ -1152,16 +1193,9 @@ out_valid_noupdate: _leave(" = 1 [valid]"); return 1; - /* the dirent, if it exists, now points to a different vnode */ not_found: - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_NFSFS_RENAMED; - spin_unlock(&dentry->d_lock); - -out_bad_parent: _debug("dropping dentry %pd2", dentry); dput(parent); -out_bad: key_put(key); _leave(" = 0 [bad]"); @@ -1212,128 +1246,115 @@ void afs_d_release(struct dentry *dentry) _enter("%pd", dentry); } +void afs_check_for_remote_deletion(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + + switch (op->ac.abort_code) { + case VNOVNODE: + set_bit(AFS_VNODE_DELETED, &vnode->flags); + afs_break_callback(vnode, afs_cb_break_for_deleted); + } +} + /* * Create a new inode for create/mkdir/symlink */ -static void afs_vnode_new_inode(struct afs_fs_cursor *fc, - struct dentry *new_dentry, - struct afs_iget_data *new_data, - struct afs_status_cb *new_scb) +static void afs_vnode_new_inode(struct afs_operation *op) { + struct afs_vnode_param *vp = &op->file[1]; struct afs_vnode *vnode; struct inode *inode; - if (fc->ac.error < 0) - return; + _enter(""); - inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key, - new_data, new_scb, fc->cbi, fc->vnode); + ASSERTCMP(op->error, ==, 0); + + inode = afs_iget(op, vp); if (IS_ERR(inode)) { /* ENOMEM or EINTR at a really inconvenient time - just abandon * the new directory on the server. */ - fc->ac.error = PTR_ERR(inode); + op->error = PTR_ERR(inode); return; } vnode = AFS_FS_I(inode); set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); - if (fc->ac.error == 0) - afs_cache_permit(vnode, fc->key, vnode->cb_break, new_scb); - d_instantiate(new_dentry, inode); + if (!op->error) + afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb); + d_instantiate(op->dentry, inode); } -static void afs_prep_for_new_inode(struct afs_fs_cursor *fc, - struct afs_iget_data *iget_data) +static void afs_create_success(struct afs_operation *op) { - iget_data->volume = fc->vnode->volume; - iget_data->cb_v_break = fc->vnode->volume->cb_v_break; - iget_data->cb_s_break = fc->cbi->server->cb_s_break; + _enter("op=%08x", op->debug_id); + op->ctime = op->file[0].scb.status.mtime_client; + afs_vnode_commit_status(op, &op->file[0]); + afs_update_dentry_version(op, &op->file[0], op->dentry); + afs_vnode_new_inode(op); } -/* - * Note that a dentry got changed. We need to set d_fsdata to the data version - * number derived from the result of the operation. It doesn't matter if - * d_fsdata goes backwards as we'll just revalidate. - */ -static void afs_update_dentry_version(struct afs_fs_cursor *fc, - struct dentry *dentry, - struct afs_status_cb *scb) +static void afs_create_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + struct afs_vnode *dvnode = dvp->vnode; + + _enter("op=%08x", op->debug_id); + + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) + afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid, + op->create.reason); + up_write(&dvnode->validate_lock); +} + +static void afs_create_put(struct afs_operation *op) { - if (fc->ac.error == 0) - dentry->d_fsdata = - (void *)(unsigned long)scb->status.data_version; + _enter("op=%08x", op->debug_id); + + if (op->error) + d_drop(op->dentry); } +static const struct afs_operation_ops afs_mkdir_operation = { + .issue_afs_rpc = afs_fs_make_dir, + .issue_yfs_rpc = yfs_fs_make_dir, + .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_create_edit_dir, + .put = afs_create_put, +}; + /* * create a directory on an AFS filesystem */ -static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) { - struct afs_iget_data iget_data; - struct afs_status_cb *scb; - struct afs_fs_cursor fc; + struct afs_operation *op; struct afs_vnode *dvnode = AFS_FS_I(dir); - struct key *key; - int ret; - - mode |= S_IFDIR; _enter("{%llx:%llu},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); - ret = -ENOMEM; - scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - goto error; - - key = afs_request_key(dvnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_scb; - } - - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_prep_for_new_inode(&fc, &iget_data); - afs_fs_create(&fc, dentry->d_name.name, mode, - &scb[0], &iget_data.fid, &scb[1]); - } - - afs_check_for_remote_deletion(&fc, dvnode); - afs_vnode_commit_status(&fc, dvnode, fc.cb_break, - &data_version, &scb[0]); - afs_update_dentry_version(&fc, dentry, &scb[0]); - afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); - ret = afs_end_vnode_operation(&fc); - if (ret < 0) - goto error_key; - } else { - goto error_key; + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + d_drop(dentry); + return PTR_ERR(op); } - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, - afs_edit_dir_for_create); - - key_put(key); - kfree(scb); - _leave(" = 0"); - return 0; - -error_key: - key_put(key); -error_scb: - kfree(scb); -error: - d_drop(dentry); - _leave(" = %d", ret); - return ret; + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + op->dentry = dentry; + op->create.mode = S_IFDIR | mode; + op->create.reason = afs_edit_dir_for_mkdir; + op->ops = &afs_mkdir_operation; + return afs_do_sync_operation(op); } /* @@ -1344,79 +1365,96 @@ static void afs_dir_remove_subdir(struct dentry *dentry) if (d_really_is_positive(dentry)) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - clear_nlink(&vnode->vfs_inode); + clear_nlink(&vnode->netfs.inode); set_bit(AFS_VNODE_DELETED, &vnode->flags); clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); } } +static void afs_rmdir_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + op->ctime = op->file[0].scb.status.mtime_client; + afs_vnode_commit_status(op, &op->file[0]); + afs_update_dentry_version(op, &op->file[0], op->dentry); +} + +static void afs_rmdir_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode *dvnode = dvp->vnode; + + _enter("op=%08x", op->debug_id); + afs_dir_remove_subdir(op->dentry); + + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) + afs_edit_dir_remove(dvnode, &op->dentry->d_name, + afs_edit_dir_for_rmdir); + up_write(&dvnode->validate_lock); +} + +static void afs_rmdir_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + if (op->file[1].vnode) + up_write(&op->file[1].vnode->rmdir_lock); +} + +static const struct afs_operation_ops afs_rmdir_operation = { + .issue_afs_rpc = afs_fs_remove_dir, + .issue_yfs_rpc = yfs_fs_remove_dir, + .success = afs_rmdir_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_rmdir_edit_dir, + .put = afs_rmdir_put, +}; + /* * remove a directory from an AFS filesystem */ static int afs_rmdir(struct inode *dir, struct dentry *dentry) { - struct afs_status_cb *scb; - struct afs_fs_cursor fc; + struct afs_operation *op; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; - struct key *key; int ret; _enter("{%llx:%llu},{%pd}", dvnode->fid.vid, dvnode->fid.vnode, dentry); - scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - return -ENOMEM; + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - key = afs_request_key(dvnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error; - } + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + + op->dentry = dentry; + op->ops = &afs_rmdir_operation; /* Try to make sure we have a callback promise on the victim. */ if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); - ret = afs_validate(vnode, key); + ret = afs_validate(vnode, op->key); if (ret < 0) - goto error_key; + goto error; } if (vnode) { ret = down_write_killable(&vnode->rmdir_lock); if (ret < 0) - goto error_key; + goto error; + op->file[1].vnode = vnode; } - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_remove(&fc, vnode, dentry->d_name.name, true, scb); - } - - afs_vnode_commit_status(&fc, dvnode, fc.cb_break, - &data_version, scb); - afs_update_dentry_version(&fc, dentry, scb); - ret = afs_end_vnode_operation(&fc); - if (ret == 0) { - afs_dir_remove_subdir(dentry); - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_rmdir); - } - } + return afs_do_sync_operation(op); - if (vnode) - up_write(&vnode->rmdir_lock); -error_key: - key_put(key); error: - kfree(scb); - return ret; + return afs_put_operation(op); } /* @@ -1429,52 +1467,92 @@ error: * However, if we didn't have a callback promise outstanding, or it was * outstanding on a different server, then it won't break it either... */ -static int afs_dir_remove_link(struct afs_vnode *dvnode, struct dentry *dentry, - struct key *key) +static void afs_dir_remove_link(struct afs_operation *op) { - int ret = 0; + struct afs_vnode *dvnode = op->file[0].vnode; + struct afs_vnode *vnode = op->file[1].vnode; + struct dentry *dentry = op->dentry; + int ret; - if (d_really_is_positive(dentry)) { - struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + if (op->error != 0 || + (op->file[1].scb.have_status && op->file[1].scb.have_error)) + return; + if (d_really_is_positive(dentry)) + return; - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - /* Already done */ - } else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { - write_seqlock(&vnode->cb_lock); - drop_nlink(&vnode->vfs_inode); - if (vnode->vfs_inode.i_nlink == 0) { - set_bit(AFS_VNODE_DELETED, &vnode->flags); - __afs_break_callback(vnode, afs_cb_break_for_unlink); - } - write_sequnlock(&vnode->cb_lock); - ret = 0; - } else { - afs_break_callback(vnode, afs_cb_break_for_unlink); + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { + /* Already done */ + } else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { + write_seqlock(&vnode->cb_lock); + drop_nlink(&vnode->netfs.inode); + if (vnode->netfs.inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + __afs_break_callback(vnode, afs_cb_break_for_unlink); + } + write_sequnlock(&vnode->cb_lock); + } else { + afs_break_callback(vnode, afs_cb_break_for_unlink); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - kdebug("AFS_VNODE_DELETED"); + if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) + _debug("AFS_VNODE_DELETED"); - ret = afs_validate(vnode, key); - if (ret == -ESTALE) - ret = 0; - } - _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); + ret = afs_validate(vnode, op->key); + if (ret != -ESTALE) + op->error = ret; } - return ret; + _debug("nlink %d [val %d]", vnode->netfs.inode.i_nlink, op->error); +} + +static void afs_unlink_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + op->ctime = op->file[0].scb.status.mtime_client; + afs_check_dir_conflict(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[1]); + afs_update_dentry_version(op, &op->file[0], op->dentry); + afs_dir_remove_link(op); +} + +static void afs_unlink_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode *dvnode = dvp->vnode; + + _enter("op=%08x", op->debug_id); + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) + afs_edit_dir_remove(dvnode, &op->dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); +} + +static void afs_unlink_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + if (op->unlink.need_rehash && op->error < 0 && op->error != -ENOENT) + d_rehash(op->dentry); } +static const struct afs_operation_ops afs_unlink_operation = { + .issue_afs_rpc = afs_fs_remove_file, + .issue_yfs_rpc = yfs_fs_remove_file, + .success = afs_unlink_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_unlink_edit_dir, + .put = afs_unlink_put, +}; + /* * Remove a file or symlink from an AFS filesystem. */ static int afs_unlink(struct inode *dir, struct dentry *dentry) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - struct key *key; - bool need_rehash = false; int ret; _enter("{%llx:%llu},{%pd}", @@ -1483,255 +1561,209 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) if (dentry->d_name.len >= AFSNAMEMAX) return -ENAMETOOLONG; - ret = -ENOMEM; - scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - goto error; + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - key = afs_request_key(dvnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_scb; - } + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; /* Try to make sure we have a callback promise on the victim. */ - ret = afs_validate(vnode, key); - if (ret < 0) - goto error_key; + ret = afs_validate(vnode, op->key); + if (ret < 0) { + op->error = ret; + goto error; + } spin_lock(&dentry->d_lock); if (d_count(dentry) > 1) { spin_unlock(&dentry->d_lock); /* Start asynchronous writeout of the inode */ write_inode_now(d_inode(dentry), 0); - ret = afs_sillyrename(dvnode, vnode, dentry, key); - goto error_key; + op->error = afs_sillyrename(dvnode, vnode, dentry, op->key); + goto error; } if (!d_unhashed(dentry)) { /* Prevent a race with RCU lookup. */ __d_drop(dentry); - need_rehash = true; + op->unlink.need_rehash = true; } spin_unlock(&dentry->d_lock); - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; - afs_dataversion_t data_version_2 = vnode->status.data_version; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(dvnode); - fc.cb_break_2 = afs_calc_vnode_cb_break(vnode); - - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) && - !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) { - yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name, - &scb[0], &scb[1]); - if (fc.ac.error != -ECONNABORTED || - fc.ac.abort_code != RXGEN_OPCODE) - continue; - set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags); - } + op->file[1].vnode = vnode; + op->file[1].update_ctime = true; + op->file[1].op_unlinked = true; + op->dentry = dentry; + op->ops = &afs_unlink_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); - afs_fs_remove(&fc, vnode, dentry->d_name.name, false, &scb[0]); - } - - afs_vnode_commit_status(&fc, dvnode, fc.cb_break, - &data_version, &scb[0]); - afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, - &data_version_2, &scb[1]); - afs_update_dentry_version(&fc, dentry, &scb[0]); - ret = afs_end_vnode_operation(&fc); - if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) - ret = afs_dir_remove_link(dvnode, dentry, key); - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_unlink); + /* If there was a conflict with a third party, check the status of the + * unlinked vnode. + */ + if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) { + op->file[1].update_ctime = false; + op->fetch_status.which = 1; + op->ops = &afs_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); } - if (need_rehash && ret < 0 && ret != -ENOENT) - d_rehash(dentry); + return afs_put_operation(op); -error_key: - key_put(key); -error_scb: - kfree(scb); error: - _leave(" = %d", ret); - return ret; + return afs_put_operation(op); } +static const struct afs_operation_ops afs_create_operation = { + .issue_afs_rpc = afs_fs_create_file, + .issue_yfs_rpc = yfs_fs_create_file, + .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_create_edit_dir, + .put = afs_create_put, +}; + /* * create a regular file on an AFS filesystem */ -static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl) +static int afs_create(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) { - struct afs_iget_data iget_data; - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *dvnode = AFS_FS_I(dir); - struct key *key; - int ret; + int ret = -ENAMETOOLONG; - mode |= S_IFREG; - - _enter("{%llx:%llu},{%pd},%ho,", + _enter("{%llx:%llu},{%pd},%ho", dvnode->fid.vid, dvnode->fid.vnode, dentry, mode); - ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) goto error; - key = afs_request_key(dvnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); goto error; } - ret = -ENOMEM; - scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - goto error_scb; - - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_prep_for_new_inode(&fc, &iget_data); - afs_fs_create(&fc, dentry->d_name.name, mode, - &scb[0], &iget_data.fid, &scb[1]); - } - - afs_check_for_remote_deletion(&fc, dvnode); - afs_vnode_commit_status(&fc, dvnode, fc.cb_break, - &data_version, &scb[0]); - afs_update_dentry_version(&fc, dentry, &scb[0]); - afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); - ret = afs_end_vnode_operation(&fc); - if (ret < 0) - goto error_key; - } else { - goto error_key; - } - - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, - afs_edit_dir_for_create); + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; - kfree(scb); - key_put(key); - _leave(" = 0"); - return 0; + op->dentry = dentry; + op->create.mode = S_IFREG | mode; + op->create.reason = afs_edit_dir_for_create; + op->ops = &afs_create_operation; + return afs_do_sync_operation(op); -error_scb: - kfree(scb); -error_key: - key_put(key); error: d_drop(dentry); _leave(" = %d", ret); return ret; } +static void afs_link_success(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; + + _enter("op=%08x", op->debug_id); + op->ctime = dvp->scb.status.mtime_client; + afs_vnode_commit_status(op, dvp); + afs_vnode_commit_status(op, vp); + afs_update_dentry_version(op, dvp, op->dentry); + if (op->dentry_2->d_parent == op->dentry->d_parent) + afs_update_dentry_version(op, dvp, op->dentry_2); + ihold(&vp->vnode->netfs.inode); + d_instantiate(op->dentry, &vp->vnode->netfs.inode); +} + +static void afs_link_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + if (op->error) + d_drop(op->dentry); +} + +static const struct afs_operation_ops afs_link_operation = { + .issue_afs_rpc = afs_fs_link, + .issue_yfs_rpc = yfs_fs_link, + .success = afs_link_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_create_edit_dir, + .put = afs_link_put, +}; + /* * create a hard link between files in an AFS filesystem */ static int afs_link(struct dentry *from, struct inode *dir, struct dentry *dentry) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); - struct key *key; - int ret; + int ret = -ENAMETOOLONG; _enter("{%llx:%llu},{%llx:%llu},{%pd}", vnode->fid.vid, vnode->fid.vnode, dvnode->fid.vid, dvnode->fid.vnode, dentry); - ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) goto error; - ret = -ENOMEM; - scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); goto error; - - key = afs_request_key(dvnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_scb; - } - - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; - - if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { - afs_end_vnode_operation(&fc); - goto error_key; - } - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(dvnode); - fc.cb_break_2 = afs_calc_vnode_cb_break(vnode); - afs_fs_link(&fc, vnode, dentry->d_name.name, - &scb[0], &scb[1]); - } - - afs_vnode_commit_status(&fc, dvnode, fc.cb_break, - &data_version, &scb[0]); - afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, - NULL, &scb[1]); - ihold(&vnode->vfs_inode); - afs_update_dentry_version(&fc, dentry, &scb[0]); - d_instantiate(dentry, &vnode->vfs_inode); - - mutex_unlock(&vnode->io_lock); - ret = afs_end_vnode_operation(&fc); - if (ret < 0) - goto error_key; - } else { - goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid, - afs_edit_dir_for_link); - - key_put(key); - kfree(scb); - _leave(" = 0"); - return 0; - -error_key: - key_put(key); -error_scb: - kfree(scb); + ret = afs_validate(vnode, op->key); + if (ret < 0) + goto error_op; + + afs_op_set_vnode(op, 0, dvnode); + afs_op_set_vnode(op, 1, vnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; + + op->dentry = dentry; + op->dentry_2 = from; + op->ops = &afs_link_operation; + op->create.reason = afs_edit_dir_for_link; + return afs_do_sync_operation(op); + +error_op: + afs_put_operation(op); error: d_drop(dentry); _leave(" = %d", ret); return ret; } +static const struct afs_operation_ops afs_symlink_operation = { + .issue_afs_rpc = afs_fs_symlink, + .issue_yfs_rpc = yfs_fs_symlink, + .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_create_edit_dir, + .put = afs_create_put, +}; + /* * create a symlink in an AFS filesystem */ -static int afs_symlink(struct inode *dir, struct dentry *dentry, - const char *content) +static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, const char *content) { - struct afs_iget_data iget_data; - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *dvnode = AFS_FS_I(dir); - struct key *key; int ret; _enter("{%llx:%llu},{%pd},%s", @@ -1746,73 +1778,130 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, if (strlen(content) >= AFSPATHMAX) goto error; - ret = -ENOMEM; - scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); goto error; - - key = afs_request_key(dvnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_scb; - } - - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t data_version = dvnode->status.data_version + 1; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_prep_for_new_inode(&fc, &iget_data); - afs_fs_symlink(&fc, dentry->d_name.name, content, - &scb[0], &iget_data.fid, &scb[1]); - } - - afs_check_for_remote_deletion(&fc, dvnode); - afs_vnode_commit_status(&fc, dvnode, fc.cb_break, - &data_version, &scb[0]); - afs_update_dentry_version(&fc, dentry, &scb[0]); - afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); - ret = afs_end_vnode_operation(&fc); - if (ret < 0) - goto error_key; - } else { - goto error_key; } - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, - afs_edit_dir_for_symlink); + afs_op_set_vnode(op, 0, dvnode); + op->file[0].dv_delta = 1; - key_put(key); - kfree(scb); - _leave(" = 0"); - return 0; + op->dentry = dentry; + op->ops = &afs_symlink_operation; + op->create.reason = afs_edit_dir_for_symlink; + op->create.symlink = content; + return afs_do_sync_operation(op); -error_key: - key_put(key); -error_scb: - kfree(scb); error: d_drop(dentry); _leave(" = %d", ret); return ret; } +static void afs_rename_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + + op->ctime = op->file[0].scb.status.mtime_client; + afs_check_dir_conflict(op, &op->file[1]); + afs_vnode_commit_status(op, &op->file[0]); + if (op->file[1].vnode != op->file[0].vnode) { + op->ctime = op->file[1].scb.status.mtime_client; + afs_vnode_commit_status(op, &op->file[1]); + } +} + +static void afs_rename_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; + struct afs_vnode *orig_dvnode = orig_dvp->vnode; + struct afs_vnode *new_dvnode = new_dvp->vnode; + struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry)); + struct dentry *old_dentry = op->dentry; + struct dentry *new_dentry = op->dentry_2; + struct inode *new_inode; + + _enter("op=%08x", op->debug_id); + + if (op->rename.rehash) { + d_rehash(op->rename.rehash); + op->rename.rehash = NULL; + } + + down_write(&orig_dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) && + orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta) + afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, + afs_edit_dir_for_rename_0); + + if (new_dvnode != orig_dvnode) { + up_write(&orig_dvnode->validate_lock); + down_write(&new_dvnode->validate_lock); + } + + if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) && + new_dvnode->status.data_version == new_dvp->dv_before + new_dvp->dv_delta) { + if (!op->rename.new_negative) + afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, + afs_edit_dir_for_rename_1); + + afs_edit_dir_add(new_dvnode, &new_dentry->d_name, + &vnode->fid, afs_edit_dir_for_rename_2); + } + + new_inode = d_inode(new_dentry); + if (new_inode) { + spin_lock(&new_inode->i_lock); + if (S_ISDIR(new_inode->i_mode)) + clear_nlink(new_inode); + else if (new_inode->i_nlink > 0) + drop_nlink(new_inode); + spin_unlock(&new_inode->i_lock); + } + + /* Now we can update d_fsdata on the dentries to reflect their + * new parent's data_version. + * + * Note that if we ever implement RENAME_EXCHANGE, we'll have + * to update both dentries with opposing dir versions. + */ + afs_update_dentry_version(op, new_dvp, op->dentry); + afs_update_dentry_version(op, new_dvp, op->dentry_2); + + d_move(old_dentry, new_dentry); + + up_write(&new_dvnode->validate_lock); +} + +static void afs_rename_put(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + if (op->rename.rehash) + d_rehash(op->rename.rehash); + dput(op->rename.tmp); + if (op->error) + d_rehash(op->dentry); +} + +static const struct afs_operation_ops afs_rename_operation = { + .issue_afs_rpc = afs_fs_rename, + .issue_yfs_rpc = yfs_fs_rename, + .success = afs_rename_success, + .edit_dir = afs_rename_edit_dir, + .put = afs_rename_put, +}; + /* * rename a file in an AFS filesystem and/or move it between directories */ -static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) +static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *orig_dvnode, *new_dvnode, *vnode; - struct dentry *tmp = NULL, *rehash = NULL; - struct inode *new_inode; - struct key *key; - bool new_negative = d_is_negative(new_dentry); int ret; if (flags) @@ -1832,16 +1921,28 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, new_dvnode->fid.vid, new_dvnode->fid.vnode, new_dentry); - ret = -ENOMEM; - scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) + op = afs_alloc_operation(NULL, orig_dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); + + ret = afs_validate(vnode, op->key); + op->error = ret; + if (ret < 0) goto error; - key = afs_request_key(orig_dvnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_scb; - } + afs_op_set_vnode(op, 0, orig_dvnode); + afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */ + op->file[0].dv_delta = 1; + op->file[1].dv_delta = 1; + op->file[0].modification = true; + op->file[1].modification = true; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; + + op->dentry = old_dentry; + op->dentry_2 = new_dentry; + op->rename.new_negative = d_is_negative(new_dentry); + op->ops = &afs_rename_operation; /* For non-directories, check whether the target is busy and if so, * make a copy of the dentry and then do a silly-rename. If the @@ -1854,26 +1955,29 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, */ if (!d_unhashed(new_dentry)) { d_drop(new_dentry); - rehash = new_dentry; + op->rename.rehash = new_dentry; } if (d_count(new_dentry) > 2) { /* copy the target dentry's name */ - ret = -ENOMEM; - tmp = d_alloc(new_dentry->d_parent, - &new_dentry->d_name); - if (!tmp) - goto error_rehash; + op->rename.tmp = d_alloc(new_dentry->d_parent, + &new_dentry->d_name); + if (!op->rename.tmp) { + op->error = -ENOMEM; + goto error; + } ret = afs_sillyrename(new_dvnode, AFS_FS_I(d_inode(new_dentry)), - new_dentry, key); - if (ret) - goto error_rehash; + new_dentry, op->key); + if (ret) { + op->error = ret; + goto error; + } - new_dentry = tmp; - rehash = NULL; - new_negative = true; + op->dentry_2 = op->rename.tmp; + op->rename.rehash = NULL; + op->rename.new_negative = true; } } @@ -1888,142 +1992,47 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, */ d_drop(old_dentry); - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { - afs_dataversion_t orig_data_version; - afs_dataversion_t new_data_version; - struct afs_status_cb *new_scb = &scb[1]; - - orig_data_version = orig_dvnode->status.data_version + 1; + return afs_do_sync_operation(op); - if (orig_dvnode != new_dvnode) { - if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { - afs_end_vnode_operation(&fc); - goto error_rehash_old; - } - new_data_version = new_dvnode->status.data_version + 1; - } else { - new_data_version = orig_data_version; - new_scb = &scb[0]; - } - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode); - fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode); - afs_fs_rename(&fc, old_dentry->d_name.name, - new_dvnode, new_dentry->d_name.name, - &scb[0], new_scb); - } - - afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break, - &orig_data_version, &scb[0]); - if (new_dvnode != orig_dvnode) { - afs_vnode_commit_status(&fc, new_dvnode, fc.cb_break_2, - &new_data_version, &scb[1]); - mutex_unlock(&new_dvnode->io_lock); - } - ret = afs_end_vnode_operation(&fc); - if (ret < 0) - goto error_rehash_old; - } - - if (ret == 0) { - if (rehash) - d_rehash(rehash); - if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) - afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, - afs_edit_dir_for_rename_0); - - if (!new_negative && - test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) - afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, - afs_edit_dir_for_rename_1); - - if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) - afs_edit_dir_add(new_dvnode, &new_dentry->d_name, - &vnode->fid, afs_edit_dir_for_rename_2); - - new_inode = d_inode(new_dentry); - if (new_inode) { - spin_lock(&new_inode->i_lock); - if (new_inode->i_nlink > 0) - drop_nlink(new_inode); - spin_unlock(&new_inode->i_lock); - } - - /* Now we can update d_fsdata on the dentries to reflect their - * new parent's data_version. - * - * Note that if we ever implement RENAME_EXCHANGE, we'll have - * to update both dentries with opposing dir versions. - */ - if (new_dvnode != orig_dvnode) { - afs_update_dentry_version(&fc, old_dentry, &scb[1]); - afs_update_dentry_version(&fc, new_dentry, &scb[1]); - } else { - afs_update_dentry_version(&fc, old_dentry, &scb[0]); - afs_update_dentry_version(&fc, new_dentry, &scb[0]); - } - d_move(old_dentry, new_dentry); - goto error_tmp; - } - -error_rehash_old: - d_rehash(new_dentry); -error_rehash: - if (rehash) - d_rehash(rehash); -error_tmp: - if (tmp) - dput(tmp); - key_put(key); -error_scb: - kfree(scb); error: - _leave(" = %d", ret); - return ret; + return afs_put_operation(op); } /* - * Release a directory page and clean up its private state if it's not busy - * - return true if the page can now be released, false if not + * Release a directory folio and clean up its private state if it's not busy + * - return true if the folio can now be released, false if not */ -static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags) +static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags) { - struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio)); - _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index); + _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio)); - set_page_private(page, 0); - ClearPagePrivate(page); + folio_detach_private(folio); /* The directory will need reloading. */ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_stat_v(dvnode, n_relpg); - return 1; + return true; } /* - * invalidate part or all of a page - * - release a page and clean up its private data if offset is 0 (indicating - * the entire page) + * Invalidate part or all of a folio. */ -static void afs_dir_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) +static void afs_dir_invalidate_folio(struct folio *folio, size_t offset, + size_t length) { - struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host); + struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio)); - _enter("{%lu},%u,%u", page->index, offset, length); + _enter("{%lu},%zu,%zu", folio->index, offset, length); - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); /* The directory will need reloading. */ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) afs_stat_v(dvnode, n_inval); - /* we clean up only if the entire page is being invalidated */ - if (offset == 0 && length == PAGE_SIZE) { - set_page_private(page, 0); - ClearPagePrivate(page); - } + /* we clean up only if the entire folio is being invalidated */ + if (offset == 0 && length == folio_size(folio)) + folio_detach_private(folio); } diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index b108528bf010..0ab7752d1b75 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -105,6 +105,25 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block, } /* + * Get a new directory folio. + */ +static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index) +{ + struct address_space *mapping = vnode->netfs.inode.i_mapping; + struct folio *folio; + + folio = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping->gfp_mask); + if (!folio) + clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + else if (folio && !folio_test_private(folio)) + folio_attach_private(folio, (void *)1); + + return folio; +} + +/* * Scan a directory block looking for a dirent of the right name. */ static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name, @@ -188,74 +207,60 @@ void afs_edit_dir_add(struct afs_vnode *vnode, enum afs_edit_dir_reason why) { union afs_xdr_dir_block *meta, *block; - struct afs_xdr_dir_page *meta_page, *dir_page; union afs_xdr_dirent *de; - struct page *page0, *page; + struct folio *folio0, *folio; unsigned int need_slots, nr_blocks, b; pgoff_t index; loff_t i_size; - gfp_t gfp; int slot; _enter(",,{%d,%s},", name->len, name->name); - i_size = i_size_read(&vnode->vfs_inode); + i_size = i_size_read(&vnode->netfs.inode); if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); return; } - gfp = vnode->vfs_inode.i_mapping->gfp_mask; - page0 = find_or_create_page(vnode->vfs_inode.i_mapping, 0, gfp); - if (!page0) { - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + folio0 = afs_dir_get_folio(vnode, 0); + if (!folio0) { _leave(" [fgp]"); return; } /* Work out how many slots we're going to need. */ - need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); - need_slots /= AFS_DIR_DIRENT_SIZE; + need_slots = afs_dir_calc_slots(name->len); - meta_page = kmap(page0); - meta = &meta_page->blocks[0]; + meta = kmap_local_folio(folio0, 0); if (i_size == 0) goto new_directory; nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; - /* Find a block that has sufficient slots available. Each VM page + /* Find a block that has sufficient slots available. Each folio * contains two or more directory blocks. */ for (b = 0; b < nr_blocks + 1; b++) { - /* If the directory extended into a new page, then we need to - * tack a new page on the end. + /* If the directory extended into a new folio, then we need to + * tack a new folio on the end. */ index = b / AFS_DIR_BLOCKS_PER_PAGE; - if (index == 0) { - page = page0; - dir_page = meta_page; - } else { - if (nr_blocks >= AFS_DIR_MAX_BLOCKS) - goto error; - gfp = vnode->vfs_inode.i_mapping->gfp_mask; - page = find_or_create_page(vnode->vfs_inode.i_mapping, - index, gfp); - if (!page) + if (nr_blocks >= AFS_DIR_MAX_BLOCKS) + goto error; + if (index >= folio_nr_pages(folio0)) { + folio = afs_dir_get_folio(vnode, index); + if (!folio) goto error; - if (!PagePrivate(page)) { - set_page_private(page, 1); - SetPagePrivate(page); - } - dir_page = kmap(page); + } else { + folio = folio0; } + block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio)); + /* Abandon the edit if we got a callback break. */ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) goto invalidated; - block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE]; - _debug("block %u: %2u %3u %u", b, (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99, @@ -266,10 +271,10 @@ void afs_edit_dir_add(struct afs_vnode *vnode, if (b == nr_blocks) { _debug("init %u", b); afs_edit_init_block(meta, block, b); - i_size_write(&vnode->vfs_inode, (b + 1) * AFS_DIR_BLOCK_SIZE); + afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE); } - /* Only lower dir pages have a counter in the header. */ + /* Only lower dir blocks have a counter in the header. */ if (b >= AFS_DIR_BLOCKS_WITH_CTR || meta->meta.alloc_ctrs[b] >= need_slots) { /* We need to try and find one or more consecutive @@ -282,10 +287,10 @@ void afs_edit_dir_add(struct afs_vnode *vnode, } } - if (page != page0) { - unlock_page(page); - kunmap(page); - put_page(page); + kunmap_local(block); + if (folio != folio0) { + folio_unlock(folio); + folio_put(folio); } } @@ -299,10 +304,10 @@ void afs_edit_dir_add(struct afs_vnode *vnode, new_directory: afs_edit_init_block(meta, meta, 0); i_size = AFS_DIR_BLOCK_SIZE; - i_size_write(&vnode->vfs_inode, i_size); + afs_set_i_size(vnode, i_size); slot = AFS_DIR_RESV_BLOCKS0; - page = page0; - block = meta; + folio = folio0; + block = kmap_local_folio(folio, 0); nr_blocks = 1; b = 0; @@ -321,33 +326,34 @@ found_space: /* Adjust the bitmap. */ afs_set_contig_bits(block, slot, need_slots); - if (page != page0) { - unlock_page(page); - kunmap(page); - put_page(page); + kunmap_local(block); + if (folio != folio0) { + folio_unlock(folio); + folio_put(folio); } /* Adjust the allocation counter. */ if (b < AFS_DIR_BLOCKS_WITH_CTR) meta->meta.alloc_ctrs[b] -= need_slots; - inode_inc_iversion_raw(&vnode->vfs_inode); + inode_inc_iversion_raw(&vnode->netfs.inode); afs_stat_v(vnode, n_dir_cr); _debug("Insert %s in %u[%u]", name->name, b, slot); out_unmap: - unlock_page(page0); - kunmap(page0); - put_page(page0); + kunmap_local(meta); + folio_unlock(folio0); + folio_put(folio0); _leave(""); return; invalidated: trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name); clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); - if (page != page0) { - kunmap(page); - put_page(page); + kunmap_local(block); + if (folio != folio0) { + folio_unlock(folio); + folio_put(folio); } goto out_unmap; @@ -367,10 +373,9 @@ error: void afs_edit_dir_remove(struct afs_vnode *vnode, struct qstr *name, enum afs_edit_dir_reason why) { - struct afs_xdr_dir_page *meta_page, *dir_page; union afs_xdr_dir_block *meta, *block; union afs_xdr_dirent *de; - struct page *page0, *page; + struct folio *folio0, *folio; unsigned int need_slots, nr_blocks, b; pgoff_t index; loff_t i_size; @@ -378,7 +383,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, _enter(",,{%d,%s},", name->len, name->name); - i_size = i_size_read(&vnode->vfs_inode); + i_size = i_size_read(&vnode->netfs.inode); if (i_size < AFS_DIR_BLOCK_SIZE || i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS || (i_size & (AFS_DIR_BLOCK_SIZE - 1))) { @@ -387,41 +392,36 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, } nr_blocks = i_size / AFS_DIR_BLOCK_SIZE; - page0 = find_lock_page(vnode->vfs_inode.i_mapping, 0); - if (!page0) { - clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); + folio0 = afs_dir_get_folio(vnode, 0); + if (!folio0) { _leave(" [fgp]"); return; } /* Work out how many slots we're going to discard. */ - need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); - need_slots /= AFS_DIR_DIRENT_SIZE; + need_slots = afs_dir_calc_slots(name->len); - meta_page = kmap(page0); - meta = &meta_page->blocks[0]; + meta = kmap_local_folio(folio0, 0); - /* Find a page that has sufficient slots available. Each VM page + /* Find a block that has sufficient slots available. Each folio * contains two or more directory blocks. */ for (b = 0; b < nr_blocks; b++) { index = b / AFS_DIR_BLOCKS_PER_PAGE; - if (index != 0) { - page = find_lock_page(vnode->vfs_inode.i_mapping, index); - if (!page) + if (index >= folio_nr_pages(folio0)) { + folio = afs_dir_get_folio(vnode, index); + if (!folio) goto error; - dir_page = kmap(page); } else { - page = page0; - dir_page = meta_page; + folio = folio0; } + block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio)); + /* Abandon the edit if we got a callback break. */ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) goto invalidated; - block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE]; - if (b > AFS_DIR_BLOCKS_WITH_CTR || meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) { slot = afs_dir_scan_block(block, name, b); @@ -429,10 +429,10 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, goto found_dirent; } - if (page != page0) { - unlock_page(page); - kunmap(page); - put_page(page); + kunmap_local(block); + if (folio != folio0) { + folio_unlock(folio); + folio_put(folio); } } @@ -453,24 +453,24 @@ found_dirent: /* Adjust the bitmap. */ afs_clear_contig_bits(block, slot, need_slots); - if (page != page0) { - unlock_page(page); - kunmap(page); - put_page(page); + kunmap_local(block); + if (folio != folio0) { + folio_unlock(folio); + folio_put(folio); } /* Adjust the allocation counter. */ if (b < AFS_DIR_BLOCKS_WITH_CTR) meta->meta.alloc_ctrs[b] += need_slots; - inode_set_iversion_raw(&vnode->vfs_inode, vnode->status.data_version); + inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version); afs_stat_v(vnode, n_dir_rm); _debug("Remove %s from %u[%u]", name->name, b, slot); out_unmap: - unlock_page(page0); - kunmap(page0); - put_page(page0); + kunmap_local(meta); + folio_unlock(folio0); + folio_put(folio0); _leave(""); return; @@ -478,10 +478,10 @@ invalidated: trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval, 0, 0, 0, 0, name->name); clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags); - if (page != page0) { - unlock_page(page); - kunmap(page); - put_page(page); + kunmap_local(block); + if (folio != folio0) { + folio_unlock(folio); + folio_put(folio); } goto out_unmap; diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index 361088a5edb9..bb5807e87fa4 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -12,6 +12,48 @@ #include <linux/fsnotify.h> #include "internal.h" +static void afs_silly_rename_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + + afs_check_dir_conflict(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[0]); +} + +static void afs_silly_rename_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode *dvnode = dvp->vnode; + struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry)); + struct dentry *old = op->dentry; + struct dentry *new = op->dentry_2; + + spin_lock(&old->d_lock); + old->d_flags |= DCACHE_NFSFS_RENAMED; + spin_unlock(&old->d_lock); + if (dvnode->silly_key != op->key) { + key_put(dvnode->silly_key); + dvnode->silly_key = key_get(op->key); + } + + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) { + afs_edit_dir_remove(dvnode, &old->d_name, + afs_edit_dir_for_silly_0); + afs_edit_dir_add(dvnode, &new->d_name, + &vnode->fid, afs_edit_dir_for_silly_1); + } + up_write(&dvnode->validate_lock); +} + +static const struct afs_operation_ops afs_silly_rename_operation = { + .issue_afs_rpc = afs_fs_rename, + .issue_yfs_rpc = yfs_fs_rename, + .success = afs_silly_rename_success, + .edit_dir = afs_silly_rename_edit_dir, +}; + /* * Actually perform the silly rename step. */ @@ -19,56 +61,33 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode struct dentry *old, struct dentry *new, struct key *key) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; - int ret = -ERESTARTSYS; + struct afs_operation *op; _enter("%pd,%pd", old, new); - scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - return -ENOMEM; + op = afs_alloc_operation(key, dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - trace_afs_silly_rename(vnode, false); - if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { - afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(dvnode); - afs_fs_rename(&fc, old->d_name.name, - dvnode, new->d_name.name, - scb, scb); - } + afs_op_set_vnode(op, 0, dvnode); + afs_op_set_vnode(op, 1, dvnode); + op->file[0].dv_delta = 1; + op->file[1].dv_delta = 1; + op->file[0].modification = true; + op->file[1].modification = true; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; - afs_vnode_commit_status(&fc, dvnode, fc.cb_break, - &dir_data_version, scb); - ret = afs_end_vnode_operation(&fc); - } - - if (ret == 0) { - spin_lock(&old->d_lock); - old->d_flags |= DCACHE_NFSFS_RENAMED; - spin_unlock(&old->d_lock); - if (dvnode->silly_key != key) { - key_put(dvnode->silly_key); - dvnode->silly_key = key_get(key); - } - - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &old->d_name, - afs_edit_dir_for_silly_0); - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_add(dvnode, &new->d_name, - &vnode->fid, afs_edit_dir_for_silly_1); - } + op->dentry = old; + op->dentry_2 = new; + op->ops = &afs_silly_rename_operation; - kfree(scb); - _leave(" = %d", ret); - return ret; + trace_afs_silly_rename(vnode, false); + return afs_do_sync_operation(op); } -/** - * afs_sillyrename - Perform a silly-rename of a dentry +/* + * Perform silly-rename of a dentry. * * AFS is stateless and the server doesn't know when the client is holding a * file open. To prevent application problems when a file is unlinked while @@ -112,12 +131,13 @@ int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode, goto out; } while (!d_is_negative(sdentry)); - ihold(&vnode->vfs_inode); + ihold(&vnode->netfs.inode); ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key); switch (ret) { case 0: /* The rename succeeded. */ + set_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags); d_move(dentry, sdentry); break; case -ERESTARTSYS: @@ -128,68 +148,85 @@ int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode, d_drop(sdentry); } - iput(&vnode->vfs_inode); + iput(&vnode->netfs.inode); dput(sdentry); out: _leave(" = %d", ret); return ret; } +static void afs_silly_unlink_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + afs_check_dir_conflict(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[0]); + afs_vnode_commit_status(op, &op->file[1]); + afs_update_dentry_version(op, &op->file[0], op->dentry); +} + +static void afs_silly_unlink_edit_dir(struct afs_operation *op) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode *dvnode = dvp->vnode; + + _enter("op=%08x", op->debug_id); + down_write(&dvnode->validate_lock); + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) && + dvnode->status.data_version == dvp->dv_before + dvp->dv_delta) + afs_edit_dir_remove(dvnode, &op->dentry->d_name, + afs_edit_dir_for_unlink); + up_write(&dvnode->validate_lock); +} + +static const struct afs_operation_ops afs_silly_unlink_operation = { + .issue_afs_rpc = afs_fs_remove_file, + .issue_yfs_rpc = yfs_fs_remove_file, + .success = afs_silly_unlink_success, + .aborted = afs_check_for_remote_deletion, + .edit_dir = afs_silly_unlink_edit_dir, +}; + /* * Tell the server to remove a sillyrename file. */ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode, struct dentry *dentry, struct key *key) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; - int ret = -ERESTARTSYS; + struct afs_operation *op; _enter(""); - scb = kcalloc(2, sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - return -ENOMEM; + op = afs_alloc_operation(NULL, dvnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - trace_afs_silly_rename(vnode, true); - if (afs_begin_vnode_operation(&fc, dvnode, key, false)) { - afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(dvnode); - - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) && - !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) { - yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name, - &scb[0], &scb[1]); - if (fc.ac.error != -ECONNABORTED || - fc.ac.abort_code != RXGEN_OPCODE) - continue; - set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags); - } - - afs_fs_remove(&fc, vnode, dentry->d_name.name, false, &scb[0]); - } + afs_op_set_vnode(op, 0, dvnode); + afs_op_set_vnode(op, 1, vnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->file[0].update_ctime = true; + op->file[1].op_unlinked = true; + op->file[1].update_ctime = true; - afs_vnode_commit_status(&fc, dvnode, fc.cb_break, - &dir_data_version, &scb[0]); - ret = afs_end_vnode_operation(&fc); - if (ret == 0) { - drop_nlink(&vnode->vfs_inode); - if (vnode->vfs_inode.i_nlink == 0) { - set_bit(AFS_VNODE_DELETED, &vnode->flags); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - } - } - if (ret == 0 && - test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - afs_edit_dir_remove(dvnode, &dentry->d_name, - afs_edit_dir_for_unlink); + op->dentry = dentry; + op->ops = &afs_silly_unlink_operation; + + trace_afs_silly_rename(vnode, true); + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + + /* If there was a conflict with a third party, check the status of the + * unlinked vnode. + */ + if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) { + op->file[1].update_ctime = false; + op->fetch_status.which = 1; + op->ops = &afs_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); } - kfree(scb); - _leave(" = %d", ret); - return ret; + return afs_put_operation(op); } /* diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 7503899c0a1b..d7d9402ff718 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -10,6 +10,100 @@ #include <linux/dns_resolver.h> #include "internal.h" +static atomic_t afs_autocell_ino; + +/* + * iget5() comparator for inode created by autocell operations + * + * These pseudo inodes don't match anything. + */ +static int afs_iget5_pseudo_test(struct inode *inode, void *opaque) +{ + return 0; +} + +/* + * iget5() inode initialiser + */ +static int afs_iget5_pseudo_set(struct inode *inode, void *opaque) +{ + struct afs_super_info *as = AFS_FS_S(inode->i_sb); + struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_fid *fid = opaque; + + vnode->volume = as->volume; + vnode->fid = *fid; + inode->i_ino = fid->vnode; + inode->i_generation = fid->unique; + return 0; +} + +/* + * Create an inode for a dynamic root directory or an autocell dynamic + * automount dir. + */ +struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) +{ + struct afs_super_info *as = AFS_FS_S(sb); + struct afs_vnode *vnode; + struct inode *inode; + struct afs_fid fid = {}; + + _enter(""); + + if (as->volume) + fid.vid = as->volume->vid; + if (root) { + fid.vnode = 1; + fid.unique = 1; + } else { + fid.vnode = atomic_inc_return(&afs_autocell_ino); + fid.unique = 0; + } + + inode = iget5_locked(sb, fid.vnode, + afs_iget5_pseudo_test, afs_iget5_pseudo_set, &fid); + if (!inode) { + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } + + _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }", + inode, inode->i_ino, fid.vid, fid.vnode, fid.unique); + + vnode = AFS_FS_I(inode); + + /* there shouldn't be an existing inode */ + BUG_ON(!(inode->i_state & I_NEW)); + + netfs_inode_init(&vnode->netfs, NULL); + inode->i_size = 0; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + if (root) { + inode->i_op = &afs_dynroot_inode_operations; + inode->i_fop = &simple_dir_operations; + } else { + inode->i_op = &afs_autocell_inode_operations; + } + set_nlink(inode, 2); + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode); + inode->i_blocks = 0; + inode->i_generation = 0; + + set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); + if (!root) { + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + inode->i_flags |= S_AUTOMOUNT; + } + + inode->i_flags |= S_NOATIME; + unlock_new_inode(inode); + _leave(" = %p", inode); + return inode; +} + /* * Probe to see if a cell may exist. This prevents positive dentries from * being created unnecessarily. @@ -30,9 +124,9 @@ static int afs_probe_cell_name(struct dentry *dentry) len--; } - cell = afs_lookup_cell_rcu(net, name, len); + cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe); if (!IS_ERR(cell)) { - afs_put_cell(net, cell); + afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe); return 0; } @@ -86,7 +180,6 @@ static struct dentry *afs_lookup_atcell(struct dentry *dentry) struct afs_cell *cell; struct afs_net *net = afs_d2net(dentry); struct dentry *ret; - unsigned int seq = 0; char *name; int len; @@ -98,17 +191,13 @@ static struct dentry *afs_lookup_atcell(struct dentry *dentry) if (!name) goto out_p; - rcu_read_lock(); - do { - read_seqbegin_or_lock(&net->cells_lock, &seq); - cell = rcu_dereference_raw(net->ws_cell); - if (cell) { - len = cell->name_len; - memcpy(name, cell->name, len + 1); - } - } while (need_seqretry(&net->cells_lock, seq)); - done_seqretry(&net->cells_lock, seq); - rcu_read_unlock(); + down_read(&net->cells_lock); + cell = net->ws_cell; + if (cell) { + len = cell->name_len; + memcpy(name, cell->name, len + 1); + } + up_read(&net->cells_lock); ret = ERR_PTR(-ENOENT); if (!cell) @@ -289,15 +378,17 @@ void afs_dynroot_depopulate(struct super_block *sb) net->dynroot_sb = NULL; mutex_unlock(&net->proc_cells_lock); - inode_lock(root->d_inode); + if (root) { + inode_lock(root->d_inode); - /* Remove all the pins for dirs created for manually added cells */ - list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) { - if (subdir->d_fsdata) { - subdir->d_fsdata = NULL; - dput(subdir); + /* Remove all the pins for dirs created for manually added cells */ + list_for_each_entry_safe(subdir, tmp, &root->d_subdirs, d_child) { + if (subdir->d_fsdata) { + subdir->d_fsdata = NULL; + dput(subdir); + } } - } - inode_unlock(root->d_inode); + inode_unlock(root->d_inode); + } } diff --git a/fs/afs/file.c b/fs/afs/file.c index 8415733f7bc1..d1cfb235c4b9 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -14,25 +14,30 @@ #include <linux/gfp.h> #include <linux/task_io_accounting_ops.h> #include <linux/mm.h> +#include <linux/swap.h> +#include <linux/netfs.h> #include "internal.h" static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); -static int afs_readpage(struct file *file, struct page *page); -static void afs_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); -static int afs_releasepage(struct page *page, gfp_t gfp_flags); +static int afs_symlink_read_folio(struct file *file, struct folio *folio); +static void afs_invalidate_folio(struct folio *folio, size_t offset, + size_t length); +static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags); -static int afs_readpages(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages); +static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); +static void afs_vm_open(struct vm_area_struct *area); +static void afs_vm_close(struct vm_area_struct *area); +static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); const struct file_operations afs_file_operations = { .open = afs_open, .release = afs_release, .llseek = generic_file_llseek, - .read_iter = generic_file_read_iter, + .read_iter = afs_file_read_iter, .write_iter = afs_file_write, .mmap = afs_file_mmap, .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = afs_fsync, .lock = afs_lock, .flock = afs_flock, @@ -42,25 +47,32 @@ const struct inode_operations afs_file_inode_operations = { .getattr = afs_getattr, .setattr = afs_setattr, .permission = afs_permission, - .listxattr = afs_listxattr, }; -const struct address_space_operations afs_fs_aops = { - .readpage = afs_readpage, - .readpages = afs_readpages, - .set_page_dirty = afs_set_page_dirty, - .launder_page = afs_launder_page, - .releasepage = afs_releasepage, - .invalidatepage = afs_invalidatepage, +const struct address_space_operations afs_file_aops = { + .read_folio = netfs_read_folio, + .readahead = netfs_readahead, + .dirty_folio = afs_dirty_folio, + .launder_folio = afs_launder_folio, + .release_folio = afs_release_folio, + .invalidate_folio = afs_invalidate_folio, .write_begin = afs_write_begin, .write_end = afs_write_end, .writepage = afs_writepage, .writepages = afs_writepages, }; +const struct address_space_operations afs_symlink_aops = { + .read_folio = afs_symlink_read_folio, + .release_folio = afs_release_folio, + .invalidate_folio = afs_invalidate_folio, +}; + static const struct vm_operations_struct afs_vm_ops = { + .open = afs_vm_open, + .close = afs_vm_close, .fault = filemap_fault, - .map_pages = filemap_map_pages, + .map_pages = afs_vm_map_pages, .page_mkwrite = afs_page_mkwrite, }; @@ -69,7 +81,7 @@ static const struct vm_operations_struct afs_vm_ops = { */ void afs_put_wb_key(struct afs_wb_key *wbk) { - if (refcount_dec_and_test(&wbk->usage)) { + if (wbk && refcount_dec_and_test(&wbk->usage)) { key_put(wbk->key); kfree(wbk); } @@ -145,7 +157,9 @@ int afs_open(struct inode *inode, struct file *file) if (file->f_flags & O_TRUNC) set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); - + + fscache_use_cookie(afs_vnode_cache(vnode), file->f_mode & FMODE_WRITE); + file->private_data = af; _leave(" = 0"); return 0; @@ -164,8 +178,10 @@ error: */ int afs_release(struct inode *inode, struct file *file) { + struct afs_vnode_cache_aux aux; struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_file *af = file->private_data; + loff_t i_size; int ret = 0; _enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode); @@ -176,6 +192,15 @@ int afs_release(struct inode *inode, struct file *file) file->private_data = NULL; if (af->wb) afs_put_wb_key(af->wb); + + if ((file->f_mode & FMODE_WRITE)) { + i_size = i_size_read(&vnode->netfs.inode); + afs_set_cache_aux(vnode, &aux); + fscache_unuse_cookie(afs_vnode_cache(vnode), &aux, &i_size); + } else { + fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL); + } + key_put(af->key); kfree(af); afs_prune_wb_keys(vnode); @@ -184,418 +209,253 @@ int afs_release(struct inode *inode, struct file *file) } /* + * Allocate a new read record. + */ +struct afs_read *afs_alloc_read(gfp_t gfp) +{ + struct afs_read *req; + + req = kzalloc(sizeof(struct afs_read), gfp); + if (req) + refcount_set(&req->usage, 1); + + return req; +} + +/* * Dispose of a ref to a read record. */ void afs_put_read(struct afs_read *req) { - int i; - if (refcount_dec_and_test(&req->usage)) { - if (req->pages) { - for (i = 0; i < req->nr_pages; i++) - if (req->pages[i]) - put_page(req->pages[i]); - if (req->pages != req->array) - kfree(req->pages); - } + if (req->cleanup) + req->cleanup(req); + key_put(req->key); kfree(req); } } -#ifdef CONFIG_AFS_FSCACHE -/* - * deal with notification that a page was read from the cache - */ -static void afs_file_readpage_read_complete(struct page *page, - void *data, - int error) +static void afs_fetch_data_notify(struct afs_operation *op) { - _enter("%p,%p,%d", page, data, error); + struct afs_read *req = op->fetch.req; + struct netfs_io_subrequest *subreq = req->subreq; + int error = op->error; + + if (error == -ECONNABORTED) + error = afs_abort_to_error(op->ac.abort_code); + req->error = error; + + if (subreq) { + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + netfs_subreq_terminated(subreq, error ?: req->actual_len, false); + req->subreq = NULL; + } else if (req->done) { + req->done(req); + } +} - /* if the read completes with an error, we just unlock the page and let - * the VM reissue the readpage */ - if (!error) - SetPageUptodate(page); - unlock_page(page); +static void afs_fetch_data_success(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + + _enter("op=%08x", op->debug_id); + afs_vnode_commit_status(op, &op->file[0]); + afs_stat_v(vnode, n_fetches); + atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes); + afs_fetch_data_notify(op); +} + +static void afs_fetch_data_put(struct afs_operation *op) +{ + op->fetch.req->error = op->error; + afs_put_read(op->fetch.req); } -#endif + +static const struct afs_operation_ops afs_fetch_data_operation = { + .issue_afs_rpc = afs_fs_fetch_data, + .issue_yfs_rpc = yfs_fs_fetch_data, + .success = afs_fetch_data_success, + .aborted = afs_check_for_remote_deletion, + .failed = afs_fetch_data_notify, + .put = afs_fetch_data_put, +}; /* * Fetch file data from the volume. */ -int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *req) +int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; - int ret; + struct afs_operation *op; _enter("%s{%llx:%llu.%u},%x,,,", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, - key_serial(key)); - - scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - return -ENOMEM; + key_serial(req->key)); - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, true)) { - afs_dataversion_t data_version = vnode->status.data_version; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_fetch_data(&fc, scb, req); - } - - afs_check_for_remote_deletion(&fc, vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, - &data_version, scb); - ret = afs_end_vnode_operation(&fc); + op = afs_alloc_operation(req->key, vnode->volume); + if (IS_ERR(op)) { + if (req->subreq) + netfs_subreq_terminated(req->subreq, PTR_ERR(op), false); + return PTR_ERR(op); } - if (ret == 0) { - afs_stat_v(vnode, n_fetches); - atomic_long_add(req->actual_len, - &afs_v2net(vnode)->n_fetch_bytes); - } + afs_op_set_vnode(op, 0, vnode); - kfree(scb); - _leave(" = %d", ret); - return ret; + op->fetch.req = afs_get_read(req); + op->ops = &afs_fetch_data_operation; + return afs_do_sync_operation(op); } -/* - * read page from file, directory or symlink, given a key to use - */ -int afs_page_filler(void *data, struct page *page) +static void afs_issue_read(struct netfs_io_subrequest *subreq) { - struct inode *inode = page->mapping->host; - struct afs_vnode *vnode = AFS_FS_I(inode); - struct afs_read *req; - struct key *key = data; - int ret; - - _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); - - BUG_ON(!PageLocked(page)); - - ret = -ESTALE; - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - goto error; - - /* is it cached? */ -#ifdef CONFIG_AFS_FSCACHE - ret = fscache_read_or_alloc_page(vnode->cache, - page, - afs_file_readpage_read_complete, - NULL, - GFP_KERNEL); -#else - ret = -ENOBUFS; -#endif - switch (ret) { - /* read BIO submitted (page in cache) */ - case 0: - break; - - /* page not yet cached */ - case -ENODATA: - _debug("cache said ENODATA"); - goto go_on; - - /* page will not be cached */ - case -ENOBUFS: - _debug("cache said ENOBUFS"); - - /* fall through */ - default: - go_on: - req = kzalloc(struct_size(req, array, 1), GFP_KERNEL); - if (!req) - goto enomem; - - /* We request a full page. If the page is a partial one at the - * end of the file, the server will return a short read and the - * unmarshalling code will clear the unfilled space. - */ - refcount_set(&req->usage, 1); - req->pos = (loff_t)page->index << PAGE_SHIFT; - req->len = PAGE_SIZE; - req->nr_pages = 1; - req->pages = req->array; - req->pages[0] = page; - get_page(page); - - /* read the contents of the file from the server into the - * page */ - ret = afs_fetch_data(vnode, key, req); - afs_put_read(req); - - if (ret < 0) { - if (ret == -ENOENT) { - _debug("got NOENT from server" - " - marking file deleted and stale"); - set_bit(AFS_VNODE_DELETED, &vnode->flags); - ret = -ESTALE; - } - -#ifdef CONFIG_AFS_FSCACHE - fscache_uncache_page(vnode->cache, page); -#endif - BUG_ON(PageFsCache(page)); - - if (ret == -EINTR || - ret == -ENOMEM || - ret == -ERESTARTSYS || - ret == -EAGAIN) - goto error; - goto io_error; - } + struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode); + struct afs_read *fsreq; + + fsreq = afs_alloc_read(GFP_NOFS); + if (!fsreq) + return netfs_subreq_terminated(subreq, -ENOMEM, false); + + fsreq->subreq = subreq; + fsreq->pos = subreq->start + subreq->transferred; + fsreq->len = subreq->len - subreq->transferred; + fsreq->key = key_get(subreq->rreq->netfs_priv); + fsreq->vnode = vnode; + fsreq->iter = &fsreq->def_iter; + + iov_iter_xarray(&fsreq->def_iter, READ, + &fsreq->vnode->netfs.inode.i_mapping->i_pages, + fsreq->pos, fsreq->len); + + afs_fetch_data(fsreq->vnode, fsreq); + afs_put_read(fsreq); +} - SetPageUptodate(page); +static int afs_symlink_read_folio(struct file *file, struct folio *folio) +{ + struct afs_vnode *vnode = AFS_FS_I(folio->mapping->host); + struct afs_read *fsreq; + int ret; - /* send the page to the cache */ -#ifdef CONFIG_AFS_FSCACHE - if (PageFsCache(page) && - fscache_write_page(vnode->cache, page, vnode->status.size, - GFP_KERNEL) != 0) { - fscache_uncache_page(vnode->cache, page); - BUG_ON(PageFsCache(page)); - } -#endif - unlock_page(page); - } + fsreq = afs_alloc_read(GFP_NOFS); + if (!fsreq) + return -ENOMEM; - _leave(" = 0"); - return 0; + fsreq->pos = folio_pos(folio); + fsreq->len = folio_size(folio); + fsreq->vnode = vnode; + fsreq->iter = &fsreq->def_iter; + iov_iter_xarray(&fsreq->def_iter, READ, &folio->mapping->i_pages, + fsreq->pos, fsreq->len); -io_error: - SetPageError(page); - goto error; -enomem: - ret = -ENOMEM; -error: - unlock_page(page); - _leave(" = %d", ret); + ret = afs_fetch_data(fsreq->vnode, fsreq); + if (ret == 0) + folio_mark_uptodate(folio); + folio_unlock(folio); return ret; } -/* - * read page from file, directory or symlink, given a file to nominate the key - * to be used - */ -static int afs_readpage(struct file *file, struct page *page) +static int afs_init_request(struct netfs_io_request *rreq, struct file *file) { - struct key *key; - int ret; - - if (file) { - key = afs_file_key(file); - ASSERT(key != NULL); - ret = afs_page_filler(key, page); - } else { - struct inode *inode = page->mapping->host; - key = afs_request_key(AFS_FS_S(inode->i_sb)->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - } else { - ret = afs_page_filler(key, page); - key_put(key); - } - } - return ret; + rreq->netfs_priv = key_get(afs_file_key(file)); + return 0; } -/* - * Make pages available as they're filled. - */ -static void afs_readpages_page_done(struct afs_read *req) +static int afs_begin_cache_operation(struct netfs_io_request *rreq) { #ifdef CONFIG_AFS_FSCACHE - struct afs_vnode *vnode = req->vnode; -#endif - struct page *page = req->pages[req->index]; - - req->pages[req->index] = NULL; - SetPageUptodate(page); + struct afs_vnode *vnode = AFS_FS_I(rreq->inode); - /* send the page to the cache */ -#ifdef CONFIG_AFS_FSCACHE - if (PageFsCache(page) && - fscache_write_page(vnode->cache, page, vnode->status.size, - GFP_KERNEL) != 0) { - fscache_uncache_page(vnode->cache, page); - BUG_ON(PageFsCache(page)); - } + return fscache_begin_read_operation(&rreq->cache_resources, + afs_vnode_cache(vnode)); +#else + return -ENOBUFS; #endif - unlock_page(page); - put_page(page); } -/* - * Read a contiguous set of pages. - */ -static int afs_readpages_one(struct file *file, struct address_space *mapping, - struct list_head *pages) +static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len, + struct folio **foliop, void **_fsdata) { - struct afs_vnode *vnode = AFS_FS_I(mapping->host); - struct afs_read *req; - struct list_head *p; - struct page *first, *page; - struct key *key = afs_file_key(file); - pgoff_t index; - int ret, n, i; - - /* Count the number of contiguous pages at the front of the list. Note - * that the list goes prev-wards rather than next-wards. - */ - first = lru_to_page(pages); - index = first->index + 1; - n = 1; - for (p = first->lru.prev; p != pages; p = p->prev) { - page = list_entry(p, struct page, lru); - if (page->index != index) - break; - index++; - n++; - } - - req = kzalloc(struct_size(req, array, n), GFP_NOFS); - if (!req) - return -ENOMEM; + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); - refcount_set(&req->usage, 1); - req->vnode = vnode; - req->page_done = afs_readpages_page_done; - req->pos = first->index; - req->pos <<= PAGE_SHIFT; - req->pages = req->array; - - /* Transfer the pages to the request. We add them in until one fails - * to add to the LRU and then we stop (as that'll make a hole in the - * contiguous run. - * - * Note that it's possible for the file size to change whilst we're - * doing this, but we rely on the server returning less than we asked - * for if the file shrank. We also rely on this to deal with a partial - * page at the end of the file. - */ - do { - page = lru_to_page(pages); - list_del(&page->lru); - index = page->index; - if (add_to_page_cache_lru(page, mapping, index, - readahead_gfp_mask(mapping))) { -#ifdef CONFIG_AFS_FSCACHE - fscache_uncache_page(vnode->cache, page); -#endif - put_page(page); - break; - } - - req->pages[req->nr_pages++] = page; - req->len += PAGE_SIZE; - } while (req->nr_pages < n); + return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0; +} - if (req->nr_pages == 0) { - kfree(req); - return 0; - } +static void afs_free_request(struct netfs_io_request *rreq) +{ + key_put(rreq->netfs_priv); +} - ret = afs_fetch_data(vnode, key, req); - if (ret < 0) - goto error; +const struct netfs_request_ops afs_req_ops = { + .init_request = afs_init_request, + .free_request = afs_free_request, + .begin_cache_operation = afs_begin_cache_operation, + .check_write_begin = afs_check_write_begin, + .issue_read = afs_issue_read, +}; - task_io_account_read(PAGE_SIZE * req->nr_pages); - afs_put_read(req); +int afs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + fscache_unpin_writeback(wbc, afs_vnode_cache(AFS_FS_I(inode))); return 0; - -error: - if (ret == -ENOENT) { - _debug("got NOENT from server" - " - marking file deleted and stale"); - set_bit(AFS_VNODE_DELETED, &vnode->flags); - ret = -ESTALE; - } - - for (i = 0; i < req->nr_pages; i++) { - page = req->pages[i]; - if (page) { -#ifdef CONFIG_AFS_FSCACHE - fscache_uncache_page(vnode->cache, page); -#endif - SetPageError(page); - unlock_page(page); - } - } - - afs_put_read(req); - return ret; } /* - * read a set of pages + * Adjust the dirty region of the page on truncation or full invalidation, + * getting rid of the markers altogether if the region is entirely invalidated. */ -static int afs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void afs_invalidate_dirty(struct folio *folio, size_t offset, + size_t length) { - struct key *key = afs_file_key(file); - struct afs_vnode *vnode; - int ret = 0; - - _enter("{%d},{%lu},,%d", - key_serial(key), mapping->host->i_ino, nr_pages); - - ASSERT(key != NULL); - - vnode = AFS_FS_I(mapping->host); - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - _leave(" = -ESTALE"); - return -ESTALE; - } - - /* attempt to read as many of the pages as possible */ -#ifdef CONFIG_AFS_FSCACHE - ret = fscache_read_or_alloc_pages(vnode->cache, - mapping, - pages, - &nr_pages, - afs_file_readpage_read_complete, - NULL, - mapping_gfp_mask(mapping)); -#else - ret = -ENOBUFS; -#endif - - switch (ret) { - /* all pages are being read from the cache */ - case 0: - BUG_ON(!list_empty(pages)); - BUG_ON(nr_pages != 0); - _leave(" = 0 [reading all]"); - return 0; - - /* there were pages that couldn't be read from the cache */ - case -ENODATA: - case -ENOBUFS: - break; - - /* other error */ - default: - _leave(" = %d", ret); - return ret; - } + struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); + unsigned long priv; + unsigned int f, t, end = offset + length; - while (!list_empty(pages)) { - ret = afs_readpages_one(file, mapping, pages); - if (ret < 0) - break; - } + priv = (unsigned long)folio_get_private(folio); - _leave(" = %d [netting]", ret); - return ret; + /* we clean up only if the entire page is being invalidated */ + if (offset == 0 && length == folio_size(folio)) + goto full_invalidate; + + /* If the page was dirtied by page_mkwrite(), the PTE stays writable + * and we don't get another notification to tell us to expand it + * again. + */ + if (afs_is_folio_dirty_mmapped(priv)) + return; + + /* We may need to shorten the dirty region */ + f = afs_folio_dirty_from(folio, priv); + t = afs_folio_dirty_to(folio, priv); + + if (t <= offset || f >= end) + return; /* Doesn't overlap */ + + if (f < offset && t > end) + return; /* Splits the dirty region - just absorb it */ + + if (f >= offset && t <= end) + goto undirty; + + if (f < offset) + t = offset; + else + f = end; + if (f == t) + goto undirty; + + priv = afs_folio_dirty(folio, f, t); + folio_change_private(folio, (void *)priv); + trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio); + return; + +undirty: + trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio); + folio_clear_dirty_for_io(folio); +full_invalidate: + trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio); + folio_detach_private(folio); } /* @@ -603,35 +463,17 @@ static int afs_readpages(struct file *file, struct address_space *mapping, * - release a page and clean up its private data if offset is 0 (indicating * the entire page) */ -static void afs_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) +static void afs_invalidate_folio(struct folio *folio, size_t offset, + size_t length) { - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - unsigned long priv; - - _enter("{%lu},%u,%u", page->index, offset, length); - - BUG_ON(!PageLocked(page)); + _enter("{%lu},%zu,%zu", folio->index, offset, length); - /* we clean up only if the entire page is being invalidated */ - if (offset == 0 && length == PAGE_SIZE) { -#ifdef CONFIG_AFS_FSCACHE - if (PageFsCache(page)) { - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - fscache_wait_on_page_write(vnode->cache, page); - fscache_uncache_page(vnode->cache, page); - } -#endif + BUG_ON(!folio_test_locked(folio)); - if (PagePrivate(page)) { - priv = page_private(page); - trace_afs_page_dirty(vnode, tracepoint_string("inval"), - page->index, priv); - set_page_private(page, 0); - ClearPagePrivate(page); - } - } + if (folio_get_private(folio)) + afs_invalidate_dirty(folio, offset, length); + folio_wait_fscache(folio); _leave(""); } @@ -639,35 +481,60 @@ static void afs_invalidatepage(struct page *page, unsigned int offset, * release a page and clean up its private state if it's not busy * - return true if the page can now be released, false if not */ -static int afs_releasepage(struct page *page, gfp_t gfp_flags) +static bool afs_release_folio(struct folio *folio, gfp_t gfp) { - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - unsigned long priv; + struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); _enter("{{%llx:%llu}[%lu],%lx},%x", - vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, - gfp_flags); + vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags, + gfp); - /* deny if page is being written to the cache and the caller hasn't + /* deny if folio is being written to the cache and the caller hasn't * elected to wait */ #ifdef CONFIG_AFS_FSCACHE - if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) { - _leave(" = F [cache busy]"); - return 0; + if (folio_test_fscache(folio)) { + if (current_is_kswapd() || !(gfp & __GFP_FS)) + return false; + folio_wait_fscache(folio); } + fscache_note_page_release(afs_vnode_cache(vnode)); #endif - if (PagePrivate(page)) { - priv = page_private(page); - trace_afs_page_dirty(vnode, tracepoint_string("rel"), - page->index, priv); - set_page_private(page, 0); - ClearPagePrivate(page); + if (folio_test_private(folio)) { + trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio); + folio_detach_private(folio); } - /* indicate that the page can be released */ + /* Indicate that the folio can be released */ _leave(" = T"); - return 1; + return true; +} + +static void afs_add_open_mmap(struct afs_vnode *vnode) +{ + if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) { + down_write(&vnode->volume->cell->fs_open_mmaps_lock); + + if (list_empty(&vnode->cb_mmap_link)) + list_add_tail(&vnode->cb_mmap_link, + &vnode->volume->cell->fs_open_mmaps); + + up_write(&vnode->volume->cell->fs_open_mmaps_lock); + } +} + +static void afs_drop_open_mmap(struct afs_vnode *vnode) +{ + if (!atomic_dec_and_test(&vnode->cb_nr_mmap)) + return; + + down_write(&vnode->volume->cell->fs_open_mmaps_lock); + + if (atomic_read(&vnode->cb_nr_mmap) == 0) + list_del_init(&vnode->cb_mmap_link); + + up_write(&vnode->volume->cell->fs_open_mmaps_lock); + flush_work(&vnode->cb_work); } /* @@ -675,10 +542,57 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags) */ static int afs_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); int ret; + afs_add_open_mmap(vnode); + ret = generic_file_mmap(file, vma); if (ret == 0) vma->vm_ops = &afs_vm_ops; + else + afs_drop_open_mmap(vnode); return ret; } + +static void afs_vm_open(struct vm_area_struct *vma) +{ + afs_add_open_mmap(AFS_FS_I(file_inode(vma->vm_file))); +} + +static void afs_vm_close(struct vm_area_struct *vma) +{ + afs_drop_open_mmap(AFS_FS_I(file_inode(vma->vm_file))); +} + +static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff) +{ + struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file)); + struct afs_file *af = vmf->vma->vm_file->private_data; + + switch (afs_validate(vnode, af->key)) { + case 0: + return filemap_map_pages(vmf, start_pgoff, end_pgoff); + case -ENOMEM: + return VM_FAULT_OOM; + case -EINTR: + case -ERESTARTSYS: + return VM_FAULT_RETRY; + case -ESTALE: + default: + return VM_FAULT_SIGBUS; + } +} + +static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); + struct afs_file *af = iocb->ki_filp->private_data; + int ret; + + ret = afs_validate(vnode, af->key); + if (ret < 0) + return ret; + + return generic_file_read_iter(iocb, iter); +} diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 0f2a94ba73cb..bbcc5afd1576 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -70,12 +70,13 @@ static void afs_schedule_lock_extension(struct afs_vnode *vnode) */ void afs_lock_op_done(struct afs_call *call) { - struct afs_vnode *vnode = call->lvnode; + struct afs_operation *op = call->op; + struct afs_vnode *vnode = op->file[0].vnode; if (call->error == 0) { spin_lock(&vnode->lock); trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0); - vnode->locked_at = call->reply_time; + vnode->locked_at = call->issue_time; afs_schedule_lock_extension(vnode); spin_unlock(&vnode->lock); } @@ -172,15 +173,26 @@ static void afs_kill_lockers_enoent(struct afs_vnode *vnode) vnode->lock_key = NULL; } +static void afs_lock_success(struct afs_operation *op) +{ + _enter("op=%08x", op->debug_id); + afs_vnode_commit_status(op, &op->file[0]); +} + +static const struct afs_operation_ops afs_set_lock_operation = { + .issue_afs_rpc = afs_fs_set_lock, + .issue_yfs_rpc = yfs_fs_set_lock, + .success = afs_lock_success, + .aborted = afs_check_for_remote_deletion, +}; + /* * Get a lock on a file */ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, afs_lock_type_t type) { - struct afs_status_cb *scb; - struct afs_fs_cursor fc; - int ret; + struct afs_operation *op; _enter("%s{%llx:%llu.%u},%x,%u", vnode->volume->name, @@ -189,35 +201,29 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, vnode->fid.unique, key_serial(key), type); - scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - return -ENOMEM; + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, true)) { - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_set_lock(&fc, type, scb); - } + afs_op_set_vnode(op, 0, vnode); - afs_check_for_remote_deletion(&fc, vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb); - ret = afs_end_vnode_operation(&fc); - } - - kfree(scb); - _leave(" = %d", ret); - return ret; + op->lock.type = type; + op->ops = &afs_set_lock_operation; + return afs_do_sync_operation(op); } +static const struct afs_operation_ops afs_extend_lock_operation = { + .issue_afs_rpc = afs_fs_extend_lock, + .issue_yfs_rpc = yfs_fs_extend_lock, + .success = afs_lock_success, +}; + /* * Extend a lock on a file */ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) { - struct afs_status_cb *scb; - struct afs_fs_cursor fc; - int ret; + struct afs_operation *op; _enter("%s{%llx:%llu.%u},%x", vnode->volume->name, @@ -226,35 +232,29 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) vnode->fid.unique, key_serial(key)); - scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - return -ENOMEM; + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, false)) { - while (afs_select_current_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_extend_lock(&fc, scb); - } + afs_op_set_vnode(op, 0, vnode); - afs_check_for_remote_deletion(&fc, vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb); - ret = afs_end_vnode_operation(&fc); - } - - kfree(scb); - _leave(" = %d", ret); - return ret; + op->flags |= AFS_OPERATION_UNINTR; + op->ops = &afs_extend_lock_operation; + return afs_do_sync_operation(op); } +static const struct afs_operation_ops afs_release_lock_operation = { + .issue_afs_rpc = afs_fs_release_lock, + .issue_yfs_rpc = yfs_fs_release_lock, + .success = afs_lock_success, +}; + /* * Release a lock on a file */ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) { - struct afs_status_cb *scb; - struct afs_fs_cursor fc; - int ret; + struct afs_operation *op; _enter("%s{%llx:%llu.%u},%x", vnode->volume->name, @@ -263,25 +263,15 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) vnode->fid.unique, key_serial(key)); - scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - return -ENOMEM; + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, false)) { - while (afs_select_current_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_release_lock(&fc, scb); - } - - afs_check_for_remote_deletion(&fc, vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, NULL, scb); - ret = afs_end_vnode_operation(&fc); - } + afs_op_set_vnode(op, 0, vnode); - kfree(scb); - _leave(" = %d", ret); - return ret; + op->flags |= AFS_OPERATION_UNINTR; + op->ops = &afs_release_lock_operation; + return afs_do_sync_operation(op); } /* @@ -386,7 +376,6 @@ again: spin_unlock(&vnode->lock); return; - /* Fall through */ default: /* Looks like a lock request was withdrawn. */ spin_unlock(&vnode->lock); @@ -783,10 +772,6 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl) fl->fl_type, fl->fl_flags, (long long) fl->fl_start, (long long) fl->fl_end); - /* AFS doesn't support mandatory locks */ - if (__mandatory_lock(&vnode->vfs_inode) && fl->fl_type != F_UNLCK) - return -ENOLCK; - if (IS_GETLK(cmd)) return afs_do_getlk(file, fl); diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c new file mode 100644 index 000000000000..7a3803ce3a22 --- /dev/null +++ b/fs/afs/fs_operation.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Fileserver-directed operation handling. + * + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include "internal.h" + +static atomic_t afs_operation_debug_counter; + +/* + * Create an operation against a volume. + */ +struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *volume) +{ + struct afs_operation *op; + + _enter(""); + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) + return ERR_PTR(-ENOMEM); + + if (!key) { + key = afs_request_key(volume->cell); + if (IS_ERR(key)) { + kfree(op); + return ERR_CAST(key); + } + } else { + key_get(key); + } + + op->key = key; + op->volume = afs_get_volume(volume, afs_volume_trace_get_new_op); + op->net = volume->cell->net; + op->cb_v_break = volume->cb_v_break; + op->debug_id = atomic_inc_return(&afs_operation_debug_counter); + op->error = -EDESTADDRREQ; + op->ac.error = SHRT_MAX; + + _leave(" = [op=%08x]", op->debug_id); + return op; +} + +/* + * Lock the vnode(s) being operated upon. + */ +static bool afs_get_io_locks(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + struct afs_vnode *vnode2 = op->file[1].vnode; + + _enter(""); + + if (op->flags & AFS_OPERATION_UNINTR) { + mutex_lock(&vnode->io_lock); + op->flags |= AFS_OPERATION_LOCK_0; + _leave(" = t [1]"); + return true; + } + + if (!vnode2 || !op->file[1].need_io_lock || vnode == vnode2) + vnode2 = NULL; + + if (vnode2 > vnode) + swap(vnode, vnode2); + + if (mutex_lock_interruptible(&vnode->io_lock) < 0) { + op->error = -ERESTARTSYS; + op->flags |= AFS_OPERATION_STOP; + _leave(" = f [I 0]"); + return false; + } + op->flags |= AFS_OPERATION_LOCK_0; + + if (vnode2) { + if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) { + op->error = -ERESTARTSYS; + op->flags |= AFS_OPERATION_STOP; + mutex_unlock(&vnode->io_lock); + op->flags &= ~AFS_OPERATION_LOCK_0; + _leave(" = f [I 1]"); + return false; + } + op->flags |= AFS_OPERATION_LOCK_1; + } + + _leave(" = t [2]"); + return true; +} + +static void afs_drop_io_locks(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + struct afs_vnode *vnode2 = op->file[1].vnode; + + _enter(""); + + if (op->flags & AFS_OPERATION_LOCK_1) + mutex_unlock(&vnode2->io_lock); + if (op->flags & AFS_OPERATION_LOCK_0) + mutex_unlock(&vnode->io_lock); +} + +static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp, + unsigned int index) +{ + struct afs_vnode *vnode = vp->vnode; + + if (vnode) { + vp->fid = vnode->fid; + vp->dv_before = vnode->status.data_version; + vp->cb_break_before = afs_calc_vnode_cb_break(vnode); + if (vnode->lock_state != AFS_VNODE_LOCK_NONE) + op->flags |= AFS_OPERATION_CUR_ONLY; + if (vp->modification) + set_bit(AFS_VNODE_MODIFYING, &vnode->flags); + } + + if (vp->fid.vnode) + _debug("PREP[%u] {%llx:%llu.%u}", + index, vp->fid.vid, vp->fid.vnode, vp->fid.unique); +} + +/* + * Begin an operation on the fileserver. + * + * Fileserver operations are serialised on the server by vnode, so we serialise + * them here also using the io_lock. + */ +bool afs_begin_vnode_operation(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + + ASSERT(vnode); + + _enter(""); + + if (op->file[0].need_io_lock) + if (!afs_get_io_locks(op)) + return false; + + afs_prepare_vnode(op, &op->file[0], 0); + afs_prepare_vnode(op, &op->file[1], 1); + op->cb_v_break = op->volume->cb_v_break; + _leave(" = true"); + return true; +} + +/* + * Tidy up a filesystem cursor and unlock the vnode. + */ +static void afs_end_vnode_operation(struct afs_operation *op) +{ + _enter(""); + + if (op->error == -EDESTADDRREQ || + op->error == -EADDRNOTAVAIL || + op->error == -ENETUNREACH || + op->error == -EHOSTUNREACH) + afs_dump_edestaddrreq(op); + + afs_drop_io_locks(op); + + if (op->error == -ECONNABORTED) + op->error = afs_abort_to_error(op->ac.abort_code); +} + +/* + * Wait for an in-progress operation to complete. + */ +void afs_wait_for_operation(struct afs_operation *op) +{ + _enter(""); + + while (afs_select_fileserver(op)) { + op->cb_s_break = op->server->cb_s_break; + if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) && + op->ops->issue_yfs_rpc) + op->ops->issue_yfs_rpc(op); + else if (op->ops->issue_afs_rpc) + op->ops->issue_afs_rpc(op); + else + op->ac.error = -ENOTSUPP; + + if (op->call) + op->error = afs_wait_for_call_to_complete(op->call, &op->ac); + } + + switch (op->error) { + case 0: + _debug("success"); + op->ops->success(op); + break; + case -ECONNABORTED: + if (op->ops->aborted) + op->ops->aborted(op); + fallthrough; + default: + if (op->ops->failed) + op->ops->failed(op); + break; + } + + afs_end_vnode_operation(op); + + if (op->error == 0 && op->ops->edit_dir) { + _debug("edit_dir"); + op->ops->edit_dir(op); + } + _leave(""); +} + +/* + * Dispose of an operation. + */ +int afs_put_operation(struct afs_operation *op) +{ + int i, ret = op->error; + + _enter("op=%08x,%d", op->debug_id, ret); + + if (op->ops && op->ops->put) + op->ops->put(op); + if (op->file[0].modification) + clear_bit(AFS_VNODE_MODIFYING, &op->file[0].vnode->flags); + if (op->file[1].modification && op->file[1].vnode != op->file[0].vnode) + clear_bit(AFS_VNODE_MODIFYING, &op->file[1].vnode->flags); + if (op->file[0].put_vnode) + iput(&op->file[0].vnode->netfs.inode); + if (op->file[1].put_vnode) + iput(&op->file[1].vnode->netfs.inode); + + if (op->more_files) { + for (i = 0; i < op->nr_files - 2; i++) + if (op->more_files[i].put_vnode) + iput(&op->more_files[i].vnode->netfs.inode); + kfree(op->more_files); + } + + afs_end_cursor(&op->ac); + afs_put_serverlist(op->net, op->server_list); + afs_put_volume(op->net, op->volume, afs_volume_trace_put_put_op); + key_put(op->key); + kfree(op); + return ret; +} + +int afs_do_sync_operation(struct afs_operation *op) +{ + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + return afs_put_operation(op); +} diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index cfe62b154f68..c0031a3ab42f 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* AFS fileserver probing * - * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ @@ -9,17 +9,89 @@ #include <linux/slab.h> #include "afs_fs.h" #include "internal.h" +#include "protocol_afs.h" #include "protocol_yfs.h" -static bool afs_fs_probe_done(struct afs_server *server) +static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ; +static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ; + +/* + * Start the probe polling timer. We have to supply it with an inc on the + * outstanding server count. + */ +static void afs_schedule_fs_probe(struct afs_net *net, + struct afs_server *server, bool fast) +{ + unsigned long atj; + + if (!net->live) + return; + + atj = server->probed_at; + atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval; + + afs_inc_servers_outstanding(net); + if (timer_reduce(&net->fs_probe_timer, atj)) + afs_dec_servers_outstanding(net); +} + +/* + * Handle the completion of a set of probes. + */ +static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server) +{ + bool responded = server->probe.responded; + + write_seqlock(&net->fs_lock); + if (responded) { + list_add_tail(&server->probe_link, &net->fs_probe_slow); + } else { + server->rtt = UINT_MAX; + clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags); + list_add_tail(&server->probe_link, &net->fs_probe_fast); + } + write_sequnlock(&net->fs_lock); + + afs_schedule_fs_probe(net, server, !responded); +} + +/* + * Handle the completion of a probe. + */ +static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server) { - if (!atomic_dec_and_test(&server->probe_outstanding)) - return false; + _enter(""); + + if (atomic_dec_and_test(&server->probe_outstanding)) + afs_finished_fs_probe(net, server); - wake_up_var(&server->probe_outstanding); - clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags); - wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING); - return true; + wake_up_all(&server->probe_wq); +} + +/* + * Handle inability to send a probe due to ENOMEM when trying to allocate a + * call struct. + */ +static void afs_fs_probe_not_done(struct afs_net *net, + struct afs_server *server, + struct afs_addr_cursor *ac) +{ + struct afs_addr_list *alist = ac->alist; + unsigned int index = ac->index; + + _enter(""); + + trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail); + spin_lock(&server->probe_lock); + + server->probe.local_failure = true; + if (server->probe.error == 0) + server->probe.error = -ENOMEM; + + set_bit(index, &alist->failed); + + spin_unlock(&server->probe_lock); + return afs_done_one_fs_probe(net, server); } /* @@ -30,11 +102,8 @@ void afs_fileserver_probe_result(struct afs_call *call) { struct afs_addr_list *alist = call->alist; struct afs_server *server = call->server; - unsigned int server_index = call->server_index; unsigned int index = call->addr_ix; - unsigned int rtt = UINT_MAX; - bool have_result = false; - u64 _rtt; + unsigned int rtt_us = 0, cap0; int ret = call->error; _enter("%pU,%u", &server->uuid, index); @@ -53,8 +122,9 @@ void afs_fileserver_probe_result(struct afs_call *call) goto responded; case -ENOMEM: case -ENONET: + clear_bit(index, &alist->responded); server->probe.local_failure = true; - afs_io_error(call, afs_io_error_fs_probe_fail); + trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); goto out; case -ECONNRESET: /* Responded, but call expired. */ case -ERFKILL: @@ -73,12 +143,11 @@ void afs_fileserver_probe_result(struct afs_call *call) server->probe.error == -ETIMEDOUT || server->probe.error == -ETIME)) server->probe.error = ret; - afs_io_error(call, afs_io_error_fs_probe_fail); + trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail); goto out; } responded: - set_bit(index, &alist->responded); clear_bit(index, &alist->failed); if (call->service_id == YFS_FS_SERVICE) { @@ -91,105 +160,72 @@ responded: clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags); alist->addrs[index].srx_service = call->service_id; } + cap0 = ntohl(call->tmp); + if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES) + set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); + else + clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } - /* Get the RTT and scale it to fit into a 32-bit value that represents - * over a minute of time so that we can access it with one instruction - * on a 32-bit system. - */ - _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); - _rtt /= 64; - rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt; - if (rtt < server->probe.rtt) { - server->probe.rtt = rtt; + if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && + rtt_us < server->probe.rtt) { + server->probe.rtt = rtt_us; + server->rtt = rtt_us; alist->preferred = index; - have_result = true; } smp_wmb(); /* Set rtt before responded. */ server->probe.responded = true; - set_bit(AFS_SERVER_FL_PROBED, &server->flags); + set_bit(index, &alist->responded); + set_bit(AFS_SERVER_FL_RESPONDING, &server->flags); out: spin_unlock(&server->probe_lock); - _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", - server_index, index, &alist->addrs[index].transport, - (unsigned int)rtt, ret); + _debug("probe %pU [%u] %pISpc rtt=%u ret=%d", + &server->uuid, index, &alist->addrs[index].transport, + rtt_us, ret); - have_result |= afs_fs_probe_done(server); - if (have_result) { - server->probe.have_result = true; - wake_up_var(&server->probe.have_result); - wake_up_all(&server->probe_wq); - } + return afs_done_one_fs_probe(call->net, server); } /* - * Probe all of a fileserver's addresses to find out the best route and to - * query its capabilities. + * Probe one or all of a fileserver's addresses to find out the best route and + * to query its capabilities. */ -static int afs_do_probe_fileserver(struct afs_net *net, - struct afs_server *server, - struct key *key, - unsigned int server_index, - struct afs_error *_e) +void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server, + struct key *key, bool all) { struct afs_addr_cursor ac = { .index = 0, }; - struct afs_call *call; - bool in_progress = false; _enter("%pU", &server->uuid); read_lock(&server->fs_lock); ac.alist = rcu_dereference_protected(server->addresses, lockdep_is_held(&server->fs_lock)); + afs_get_addrlist(ac.alist); read_unlock(&server->fs_lock); - atomic_set(&server->probe_outstanding, ac.alist->nr_addrs); + server->probed_at = jiffies; + atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1); memset(&server->probe, 0, sizeof(server->probe)); server->probe.rtt = UINT_MAX; - for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) { - call = afs_fs_get_capabilities(net, server, &ac, key, server_index); - if (!IS_ERR(call)) { - afs_put_call(call); - in_progress = true; - } else { - afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code); - } - } - - if (!in_progress) - afs_fs_probe_done(server); - return in_progress; -} + ac.index = ac.alist->preferred; + if (ac.index < 0 || ac.index >= ac.alist->nr_addrs) + all = true; -/* - * Send off probes to all unprobed servers. - */ -int afs_probe_fileservers(struct afs_net *net, struct key *key, - struct afs_server_list *list) -{ - struct afs_server *server; - struct afs_error e; - bool in_progress = false; - int i; - - e.error = 0; - e.responded = false; - for (i = 0; i < list->nr_servers; i++) { - server = list->servers[i].server; - if (test_bit(AFS_SERVER_FL_PROBED, &server->flags)) - continue; - - if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags) && - afs_do_probe_fileserver(net, server, key, i, &e)) - in_progress = true; + if (all) { + for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) + if (!afs_fs_get_capabilities(net, server, &ac, key)) + afs_fs_probe_not_done(net, server, &ac); + } else { + if (!afs_fs_get_capabilities(net, server, &ac, key)) + afs_fs_probe_not_done(net, server, &ac); } - return in_progress ? 0 : e.error; + afs_put_addrlist(ac.alist); } /* @@ -199,7 +235,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) { struct wait_queue_entry *waits; struct afs_server *server; - unsigned int rtt = UINT_MAX; + unsigned int rtt = UINT_MAX, rtt_s; bool have_responders = false; int pref = -1, i; @@ -209,7 +245,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) for (i = 0; i < slist->nr_servers; i++) { if (test_bit(i, &untried)) { server = slist->servers[i].server; - if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags)) + if (!atomic_read(&server->probe_outstanding)) __clear_bit(i, &untried); if (server->probe.responded) have_responders = true; @@ -239,7 +275,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) server = slist->servers[i].server; if (server->probe.responded) goto stop; - if (test_bit(AFS_SERVER_FL_PROBING, &server->flags)) + if (atomic_read(&server->probe_outstanding)) still_probing = true; } } @@ -255,10 +291,11 @@ stop: for (i = 0; i < slist->nr_servers; i++) { if (test_bit(i, &untried)) { server = slist->servers[i].server; - if (server->probe.responded && - server->probe.rtt < rtt) { + rtt_s = READ_ONCE(server->rtt); + if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) && + rtt_s < rtt) { pref = i; - rtt = server->probe.rtt; + rtt = rtt_s; } remove_wait_queue(&server->probe_wq, &waits[i]); @@ -274,3 +311,165 @@ stop: slist->preferred = pref; return 0; } + +/* + * Probe timer. We have an increment on fs_outstanding that we need to pass + * along to the work item. + */ +void afs_fs_probe_timer(struct timer_list *timer) +{ + struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer); + + if (!net->live || !queue_work(afs_wq, &net->fs_prober)) + afs_dec_servers_outstanding(net); +} + +/* + * Dispatch a probe to a server. + */ +static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all) + __releases(&net->fs_lock) +{ + struct key *key = NULL; + + /* We remove it from the queues here - it will be added back to + * one of the queues on the completion of the probe. + */ + list_del_init(&server->probe_link); + + afs_get_server(server, afs_server_trace_get_probe); + write_sequnlock(&net->fs_lock); + + afs_fs_probe_fileserver(net, server, key, all); + afs_put_server(net, server, afs_server_trace_put_probe); +} + +/* + * Probe a server immediately without waiting for its due time to come + * round. This is used when all of the addresses have been tried. + */ +void afs_probe_fileserver(struct afs_net *net, struct afs_server *server) +{ + write_seqlock(&net->fs_lock); + if (!list_empty(&server->probe_link)) + return afs_dispatch_fs_probe(net, server, true); + write_sequnlock(&net->fs_lock); +} + +/* + * Probe dispatcher to regularly dispatch probes to keep NAT alive. + */ +void afs_fs_probe_dispatcher(struct work_struct *work) +{ + struct afs_net *net = container_of(work, struct afs_net, fs_prober); + struct afs_server *fast, *slow, *server; + unsigned long nowj, timer_at, poll_at; + bool first_pass = true, set_timer = false; + + if (!net->live) + return; + + _enter(""); + + if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) { + _leave(" [none]"); + return; + } + +again: + write_seqlock(&net->fs_lock); + + fast = slow = server = NULL; + nowj = jiffies; + timer_at = nowj + MAX_JIFFY_OFFSET; + + if (!list_empty(&net->fs_probe_fast)) { + fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link); + poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval; + if (time_before(nowj, poll_at)) { + timer_at = poll_at; + set_timer = true; + fast = NULL; + } + } + + if (!list_empty(&net->fs_probe_slow)) { + slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link); + poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval; + if (time_before(nowj, poll_at)) { + if (time_before(poll_at, timer_at)) + timer_at = poll_at; + set_timer = true; + slow = NULL; + } + } + + server = fast ?: slow; + if (server) + _debug("probe %pU", &server->uuid); + + if (server && (first_pass || !need_resched())) { + afs_dispatch_fs_probe(net, server, server == fast); + first_pass = false; + goto again; + } + + write_sequnlock(&net->fs_lock); + + if (server) { + if (!queue_work(afs_wq, &net->fs_prober)) + afs_dec_servers_outstanding(net); + _leave(" [requeue]"); + } else if (set_timer) { + if (timer_reduce(&net->fs_probe_timer, timer_at)) + afs_dec_servers_outstanding(net); + _leave(" [timer]"); + } else { + afs_dec_servers_outstanding(net); + _leave(" [quiesce]"); + } +} + +/* + * Wait for a probe on a particular fileserver to complete for 2s. + */ +int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr) +{ + struct wait_queue_entry wait; + unsigned long timo = 2 * HZ; + + if (atomic_read(&server->probe_outstanding) == 0) + goto dont_wait; + + init_wait_entry(&wait, 0); + for (;;) { + prepare_to_wait_event(&server->probe_wq, &wait, + is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + if (timo == 0 || + server->probe.responded || + atomic_read(&server->probe_outstanding) == 0 || + (is_intr && signal_pending(current))) + break; + timo = schedule_timeout(timo); + } + + finish_wait(&server->probe_wq, &wait); + +dont_wait: + if (server->probe.responded) + return 0; + if (is_intr && signal_pending(current)) + return -ERESTARTSYS; + if (timo == 0) + return -ETIME; + return -EDESTADDRREQ; +} + +/* + * Clean up the probing when the namespace is killed off. + */ +void afs_fs_probe_cleanup(struct afs_net *net) +{ + if (del_timer_sync(&net->fs_probe_timer)) + afs_dec_servers_outstanding(net); +} diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 1f9c5d8e6fe5..7d37f63ef0f0 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -10,15 +10,10 @@ #include <linux/sched.h> #include <linux/circ_buf.h> #include <linux/iversion.h> +#include <linux/netfs.h> #include "internal.h" #include "afs_fs.h" #include "xdr_fs.h" -#include "protocol_yfs.h" - -static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) -{ - call->cbi = afs_get_cb_interest(cbi); -} /* * decode an AFSFid block @@ -56,9 +51,9 @@ static void xdr_dump_bad(const __be32 *bp) /* * decode an AFSFetchStatus block */ -static int xdr_decode_AFSFetchStatus(const __be32 **_bp, - struct afs_call *call, - struct afs_status_cb *scb) +static void xdr_decode_AFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; struct afs_file_status *status = &scb->status; @@ -78,7 +73,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, */ status->abort_code = abort_code; scb->have_error = true; - return 0; + goto advance; } pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); @@ -87,7 +82,8 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, if (abort_code != 0 && inline_error) { status->abort_code = abort_code; - return 0; + scb->have_error = true; + goto advance; } type = ntohl(xdr->type); @@ -123,18 +119,19 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp, data_version |= (u64)ntohl(xdr->data_version_hi) << 32; status->data_version = data_version; scb->have_status = true; - +advance: *_bp = (const void *)*_bp + sizeof(*xdr); - return 0; + return; bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + afs_protocol_error(call, afs_eproto_bad_status); + goto advance; } static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry) { - return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry; + return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry; } static void xdr_decode_AFSCallBack(const __be32 **_bp, @@ -238,8 +235,10 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, /* * deliver reply data to an FS.FetchStatus */ -static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) +static int afs_deliver_fs_fetch_status(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; const __be32 *bp; int ret; @@ -249,11 +248,9 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_AFSCallBack(&bp, call, call->out_scb); - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSCallBack(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -262,54 +259,39 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call) /* * FS.FetchStatus operation type */ -static const struct afs_call_type afs_RXFSFetchStatus_vnode = { - .name = "FS.FetchStatus(vnode)", +static const struct afs_call_type afs_RXFSFetchStatus = { + .name = "FS.FetchStatus", .op = afs_FS_FetchStatus, - .deliver = afs_deliver_fs_fetch_status_vnode, + .deliver = afs_deliver_fs_fetch_status, .destructor = afs_flat_call_destructor, }; /* * fetch the status information for a file */ -int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb, - struct afs_volsync *volsync) +void afs_fs_fetch_status(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_fetch_file_status(fc, scb, volsync); - _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); - call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode, + call = afs_alloc_flat_call(op->net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); - if (!call) { - fc->ac.error = -ENOMEM; - return -ENOMEM; - } - - call->key = fc->key; - call->out_scb = scb; - call->out_volsync = volsync; + if (!call) + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSFETCHSTATUS); - bp[1] = htonl(vnode->fid.vid); - bp[2] = htonl(vnode->fid.vnode); - bp[3] = htonl(vnode->fid.unique); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -317,19 +299,19 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb */ static int afs_deliver_fs_fetch_data(struct afs_call *call) { - struct afs_read *req = call->read_request; + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + struct afs_read *req = op->fetch.req; const __be32 *bp; - unsigned int size; int ret; - _enter("{%u,%zu/%llu}", - call->unmarshall, iov_iter_count(call->iter), req->actual_len); + _enter("{%u,%zu,%zu/%llu}", + call->unmarshall, call->iov_len, iov_iter_count(call->iter), + req->actual_len); switch (call->unmarshall) { case 0: req->actual_len = 0; - req->index = 0; - req->offset = req->pos & (PAGE_SIZE - 1); call->unmarshall++; if (call->operation_ID == FSFETCHDATA64) { afs_extract_to_tmp64(call); @@ -337,9 +319,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->tmp_u = htonl(0); afs_extract_to_tmp(call); } - /* Fall through */ + fallthrough; - /* extract the returned data length */ + /* Extract the returned data length into + * ->actual_len. This may indicate more or less data than was + * requested will be returned. + */ case 1: _debug("extract data length"); ret = afs_extract_data(call, true); @@ -348,53 +333,32 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) req->actual_len = be64_to_cpu(call->tmp64); _debug("DATA length: %llu", req->actual_len); - req->remain = min(req->len, req->actual_len); - if (req->remain == 0) + + if (req->actual_len == 0) goto no_more_data; + call->iter = req->iter; + call->iov_len = min(req->actual_len, req->len); call->unmarshall++; - - begin_page: - ASSERTCMP(req->index, <, req->nr_pages); - if (req->remain > PAGE_SIZE - req->offset) - size = PAGE_SIZE - req->offset; - else - size = req->remain; - call->bvec[0].bv_len = size; - call->bvec[0].bv_offset = req->offset; - call->bvec[0].bv_page = req->pages[req->index]; - iov_iter_bvec(&call->def_iter, READ, call->bvec, 1, size); - ASSERTCMP(size, <=, PAGE_SIZE); - /* Fall through */ + fallthrough; /* extract the returned data */ case 2: _debug("extract data %zu/%llu", - iov_iter_count(call->iter), req->remain); + iov_iter_count(call->iter), req->actual_len); ret = afs_extract_data(call, true); if (ret < 0) return ret; - req->remain -= call->bvec[0].bv_len; - req->offset += call->bvec[0].bv_len; - ASSERTCMP(req->offset, <=, PAGE_SIZE); - if (req->offset == PAGE_SIZE) { - req->offset = 0; - if (req->page_done) - req->page_done(req); - req->index++; - if (req->remain > 0) - goto begin_page; - } - ASSERTCMP(req->remain, ==, 0); + call->iter = &call->def_iter; if (req->actual_len <= req->len) goto no_more_data; /* Discard any excess data the server gave us */ afs_extract_discard(call, req->actual_len - req->len); call->unmarshall = 3; - /* Fall through */ + fallthrough; case 3: _debug("extract discard %zu/%llu", @@ -407,7 +371,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) no_more_data: call->unmarshall = 4; afs_extract_to_buf(call, (21 + 3 + 6) * 4); - /* Fall through */ + fallthrough; /* extract the metadata */ case 4: @@ -416,42 +380,24 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_AFSCallBack(&bp, call, call->out_scb); - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSCallBack(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); - req->data_version = call->out_scb->status.data_version; - req->file_size = call->out_scb->status.size; + req->data_version = vp->scb.status.data_version; + req->file_size = vp->scb.status.size; call->unmarshall++; + fallthrough; case 5: break; } - for (; req->index < req->nr_pages; req->index++) { - if (req->offset < PAGE_SIZE) - zero_user_segment(req->pages[req->index], - req->offset, PAGE_SIZE); - if (req->page_done) - req->page_done(req); - req->offset = 0; - } - _leave(" = 0 [done]"); return 0; } -static void afs_fetch_data_destructor(struct afs_call *call) -{ - struct afs_read *req = call->read_request; - - afs_put_read(req); - afs_flat_call_destructor(call); -} - /* * FS.FetchData operation type */ @@ -459,102 +405,79 @@ static const struct afs_call_type afs_RXFSFetchData = { .name = "FS.FetchData", .op = afs_FS_FetchData, .deliver = afs_deliver_fs_fetch_data, - .destructor = afs_fetch_data_destructor, + .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSFetchData64 = { .name = "FS.FetchData64", .op = afs_FS_FetchData64, .deliver = afs_deliver_fs_fetch_data, - .destructor = afs_fetch_data_destructor, + .destructor = afs_flat_call_destructor, }; /* * fetch data from a very large file */ -static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, - struct afs_status_cb *scb, - struct afs_read *req) +static void afs_fs_fetch_data64(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; + struct afs_read *req = op->fetch.req; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSFetchData64, 32, (21 + 3 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_scb = scb; - call->out_volsync = NULL; - call->read_request = afs_get_read(req); + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSFETCHDATA64); - bp[1] = htonl(vnode->fid.vid); - bp[2] = htonl(vnode->fid.vnode); - bp[3] = htonl(vnode->fid.unique); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); bp[4] = htonl(upper_32_bits(req->pos)); bp[5] = htonl(lower_32_bits(req->pos)); bp[6] = 0; bp[7] = htonl(lower_32_bits(req->len)); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* * fetch data from a file */ -int afs_fs_fetch_data(struct afs_fs_cursor *fc, - struct afs_status_cb *scb, - struct afs_read *req) +void afs_fs_fetch_data(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct afs_read *req = op->fetch.req; __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_fetch_data(fc, scb, req); - - if (upper_32_bits(req->pos) || - upper_32_bits(req->len) || - upper_32_bits(req->pos + req->len)) - return afs_fs_fetch_data64(fc, scb, req); + if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags)) + return afs_fs_fetch_data64(op); _enter(""); - call = afs_alloc_flat_call(net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSFetchData, 24, (21 + 3 + 6) * 4); if (!call) - return -ENOMEM; + return afs_op_nomem(op); - call->key = fc->key; - call->out_scb = scb; - call->out_volsync = NULL; - call->read_request = afs_get_read(req); + req->call_debug_id = call->debug_id; /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSFETCHDATA); - bp[1] = htonl(vnode->fid.vid); - bp[2] = htonl(vnode->fid.vnode); - bp[3] = htonl(vnode->fid.unique); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); bp[4] = htonl(lower_32_bits(req->pos)); bp[5] = htonl(lower_32_bits(req->len)); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -562,6 +485,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_create_vnode(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; const __be32 *bp; int ret; @@ -571,15 +497,11 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFid(&bp, call->out_fid); - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - xdr_decode_AFSCallBack(&bp, call, call->out_scb); - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSFid(&bp, &op->file[1].fid); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_AFSCallBack(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -595,6 +517,52 @@ static const struct afs_call_type afs_RXFSCreateFile = { .destructor = afs_flat_call_destructor, }; +/* + * Create a file. + */ +void afs_fs_create_file(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t namesz, reqsz, padsz; + __be32 *bp; + + _enter(""); + + namesz = name->len; + padsz = (4 - (namesz & 3)) & 3; + reqsz = (5 * 4) + namesz + padsz + (6 * 4); + + call = afs_alloc_flat_call(op->net, &afs_RXFSCreateFile, + reqsz, (3 + 21 + 21 + 3 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSCREATEFILE); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(namesz); + memcpy(bp, name->name, namesz); + bp = (void *) bp + namesz; + if (padsz > 0) { + memset(bp, 0, padsz); + bp = (void *) bp + padsz; + } + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ + *bp++ = 0; /* owner */ + *bp++ = 0; /* group */ + *bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */ + *bp++ = 0; /* segment size */ + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + static const struct afs_call_type afs_RXFSMakeDir = { .name = "FS.MakeDir", .op = afs_FS_MakeDir, @@ -603,80 +571,58 @@ static const struct afs_call_type afs_RXFSMakeDir = { }; /* - * create a file or make a directory + * Create a new directory */ -int afs_fs_create(struct afs_fs_cursor *fc, - const char *name, - umode_t mode, - struct afs_status_cb *dvnode_scb, - struct afs_fid *newfid, - struct afs_status_cb *new_scb) +void afs_fs_make_dir(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, padsz; __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){ - if (S_ISDIR(mode)) - return yfs_fs_make_dir(fc, name, mode, dvnode_scb, - newfid, new_scb); - else - return yfs_fs_create_file(fc, name, mode, dvnode_scb, - newfid, new_scb); - } - _enter(""); - namesz = strlen(name); + namesz = name->len; padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz + (6 * 4); - call = afs_alloc_flat_call( - net, S_ISDIR(mode) ? &afs_RXFSMakeDir : &afs_RXFSCreateFile, - reqsz, (3 + 21 + 21 + 3 + 6) * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSMakeDir, + reqsz, (3 + 21 + 21 + 3 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; - call->out_fid = newfid; - call->out_scb = new_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; - *bp++ = htonl(S_ISDIR(mode) ? FSMAKEDIR : FSCREATEFILE); - *bp++ = htonl(dvnode->fid.vid); - *bp++ = htonl(dvnode->fid.vnode); - *bp++ = htonl(dvnode->fid.unique); + *bp++ = htonl(FSMAKEDIR); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); *bp++ = htonl(namesz); - memcpy(bp, name, namesz); + memcpy(bp, name->name, namesz); bp = (void *) bp + namesz; if (padsz > 0) { memset(bp, 0, padsz); bp = (void *) bp + padsz; } *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); - *bp++ = htonl(dvnode->vfs_inode.i_mtime.tv_sec); /* mtime */ + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ - *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ + *bp++ = htonl(op->create.mode & S_IALLUGO); /* unix mode */ *bp++ = 0; /* segment size */ - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &dvnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* - * Deliver reply data to any operation that returns directory status and volume - * sync. + * Deliver reply data to any operation that returns status and volume sync. */ -static int afs_deliver_fs_dir_status_and_vol(struct afs_call *call) +static int afs_deliver_fs_file_status_and_vol(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; const __be32 *bp; int ret; @@ -686,81 +632,108 @@ static int afs_deliver_fs_dir_status_and_vol(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; } /* - * FS.RemoveDir/FS.RemoveFile operation type + * FS.RemoveFile operation type */ static const struct afs_call_type afs_RXFSRemoveFile = { .name = "FS.RemoveFile", .op = afs_FS_RemoveFile, - .deliver = afs_deliver_fs_dir_status_and_vol, + .deliver = afs_deliver_fs_file_status_and_vol, .destructor = afs_flat_call_destructor, }; +/* + * Remove a file. + */ +void afs_fs_remove_file(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + size_t namesz, reqsz, padsz; + __be32 *bp; + + _enter(""); + + namesz = name->len; + padsz = (4 - (namesz & 3)) & 3; + reqsz = (5 * 4) + namesz + padsz; + + call = afs_alloc_flat_call(op->net, &afs_RXFSRemoveFile, + reqsz, (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(FSREMOVEFILE); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(namesz); + memcpy(bp, name->name, namesz); + bp = (void *) bp + namesz; + if (padsz > 0) { + memset(bp, 0, padsz); + bp = (void *) bp + padsz; + } + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + static const struct afs_call_type afs_RXFSRemoveDir = { .name = "FS.RemoveDir", .op = afs_FS_RemoveDir, - .deliver = afs_deliver_fs_dir_status_and_vol, + .deliver = afs_deliver_fs_file_status_and_vol, .destructor = afs_flat_call_destructor, }; /* - * remove a file or directory + * Remove a directory. */ -int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, bool isdir, struct afs_status_cb *dvnode_scb) +void afs_fs_remove_dir(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, padsz; __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_remove(fc, vnode, name, isdir, dvnode_scb); - _enter(""); - namesz = strlen(name); + namesz = name->len; padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz; - call = afs_alloc_flat_call( - net, isdir ? &afs_RXFSRemoveDir : &afs_RXFSRemoveFile, - reqsz, (21 + 6) * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSRemoveDir, + reqsz, (21 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; - *bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE); - *bp++ = htonl(dvnode->fid.vid); - *bp++ = htonl(dvnode->fid.vnode); - *bp++ = htonl(dvnode->fid.unique); + *bp++ = htonl(FSREMOVEDIR); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); *bp++ = htonl(namesz); - memcpy(bp, name, namesz); + memcpy(bp, name->name, namesz); bp = (void *) bp + namesz; if (padsz > 0) { memset(bp, 0, padsz); bp = (void *) bp + padsz; } - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &dvnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -768,6 +741,9 @@ int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int afs_deliver_fs_link(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; const __be32 *bp; int ret; @@ -779,13 +755,9 @@ static int afs_deliver_fs_link(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -804,56 +776,44 @@ static const struct afs_call_type afs_RXFSLink = { /* * make a hard link */ -int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, - struct afs_status_cb *dvnode_scb, - struct afs_status_cb *vnode_scb) +void afs_fs_link(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); size_t namesz, reqsz, padsz; __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_link(fc, vnode, name, dvnode_scb, vnode_scb); - _enter(""); - namesz = strlen(name); + namesz = name->len; padsz = (4 - (namesz & 3)) & 3; reqsz = (5 * 4) + namesz + padsz + (3 * 4); - call = afs_alloc_flat_call(net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSLink, reqsz, (21 + 21 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; - call->out_scb = vnode_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSLINK); - *bp++ = htonl(dvnode->fid.vid); - *bp++ = htonl(dvnode->fid.vnode); - *bp++ = htonl(dvnode->fid.unique); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); *bp++ = htonl(namesz); - memcpy(bp, name, namesz); + memcpy(bp, name->name, namesz); bp = (void *) bp + namesz; if (padsz > 0) { memset(bp, 0, padsz); bp = (void *) bp + padsz; } - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &vnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + trace_afs_make_fs_call1(call, &vp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -861,6 +821,9 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int afs_deliver_fs_symlink(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; const __be32 *bp; int ret; @@ -872,14 +835,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSFid(&bp, call->out_fid); - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSFid(&bp, &vp->fid); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -898,75 +857,58 @@ static const struct afs_call_type afs_RXFSSymlink = { /* * create a symbolic link */ -int afs_fs_symlink(struct afs_fs_cursor *fc, - const char *name, - const char *contents, - struct afs_status_cb *dvnode_scb, - struct afs_fid *newfid, - struct afs_status_cb *new_scb) +void afs_fs_symlink(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(dvnode); size_t namesz, reqsz, padsz, c_namesz, c_padsz; __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_symlink(fc, name, contents, dvnode_scb, - newfid, new_scb); - _enter(""); - namesz = strlen(name); + namesz = name->len; padsz = (4 - (namesz & 3)) & 3; - c_namesz = strlen(contents); + c_namesz = strlen(op->create.symlink); c_padsz = (4 - (c_namesz & 3)) & 3; reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4); - call = afs_alloc_flat_call(net, &afs_RXFSSymlink, reqsz, + call = afs_alloc_flat_call(op->net, &afs_RXFSSymlink, reqsz, (3 + 21 + 21 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; - call->out_fid = newfid; - call->out_scb = new_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSSYMLINK); - *bp++ = htonl(dvnode->fid.vid); - *bp++ = htonl(dvnode->fid.vnode); - *bp++ = htonl(dvnode->fid.unique); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); *bp++ = htonl(namesz); - memcpy(bp, name, namesz); + memcpy(bp, name->name, namesz); bp = (void *) bp + namesz; if (padsz > 0) { memset(bp, 0, padsz); bp = (void *) bp + padsz; } *bp++ = htonl(c_namesz); - memcpy(bp, contents, c_namesz); + memcpy(bp, op->create.symlink, c_namesz); bp = (void *) bp + c_namesz; if (c_padsz > 0) { memset(bp, 0, c_padsz); bp = (void *) bp + c_padsz; } *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); - *bp++ = htonl(dvnode->vfs_inode.i_mtime.tv_sec); /* mtime */ + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(S_IRWXUGO); /* unix mode */ *bp++ = 0; /* segment size */ - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &dvnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -974,6 +916,9 @@ int afs_fs_symlink(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_rename(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; const __be32 *bp; int ret; @@ -981,17 +926,13 @@ static int afs_deliver_fs_rename(struct afs_call *call) if (ret < 0) return ret; - /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - if (call->out_dir_scb != call->out_scb) { - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - } - xdr_decode_AFSVolSync(&bp, call->out_volsync); + /* If the two dirs are the same, we have two copies of the same status + * report, so we just decode it twice. + */ + xdr_decode_AFSFetchStatus(&bp, call, &orig_dvp->scb); + xdr_decode_AFSFetchStatus(&bp, call, &new_dvp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -1010,31 +951,22 @@ static const struct afs_call_type afs_RXFSRename = { /* * Rename/move a file or directory. */ -int afs_fs_rename(struct afs_fs_cursor *fc, - const char *orig_name, - struct afs_vnode *new_dvnode, - const char *new_name, - struct afs_status_cb *orig_dvnode_scb, - struct afs_status_cb *new_dvnode_scb) +void afs_fs_rename(struct afs_operation *op) { - struct afs_vnode *orig_dvnode = fc->vnode; + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; + const struct qstr *orig_name = &op->dentry->d_name; + const struct qstr *new_name = &op->dentry_2->d_name; struct afs_call *call; - struct afs_net *net = afs_v2net(orig_dvnode); size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz; __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_rename(fc, orig_name, - new_dvnode, new_name, - orig_dvnode_scb, - new_dvnode_scb); - _enter(""); - o_namesz = strlen(orig_name); + o_namesz = orig_name->len; o_padsz = (4 - (o_namesz & 3)) & 3; - n_namesz = strlen(new_name); + n_namesz = new_name->len; n_padsz = (4 - (n_namesz & 3)) & 3; reqsz = (4 * 4) + @@ -1042,51 +974,46 @@ int afs_fs_rename(struct afs_fs_cursor *fc, (3 * 4) + 4 + n_namesz + n_padsz; - call = afs_alloc_flat_call(net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSRename, reqsz, (21 + 21 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = orig_dvnode_scb; - call->out_scb = new_dvnode_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSRENAME); - *bp++ = htonl(orig_dvnode->fid.vid); - *bp++ = htonl(orig_dvnode->fid.vnode); - *bp++ = htonl(orig_dvnode->fid.unique); + *bp++ = htonl(orig_dvp->fid.vid); + *bp++ = htonl(orig_dvp->fid.vnode); + *bp++ = htonl(orig_dvp->fid.unique); *bp++ = htonl(o_namesz); - memcpy(bp, orig_name, o_namesz); + memcpy(bp, orig_name->name, o_namesz); bp = (void *) bp + o_namesz; if (o_padsz > 0) { memset(bp, 0, o_padsz); bp = (void *) bp + o_padsz; } - *bp++ = htonl(new_dvnode->fid.vid); - *bp++ = htonl(new_dvnode->fid.vnode); - *bp++ = htonl(new_dvnode->fid.unique); + *bp++ = htonl(new_dvp->fid.vid); + *bp++ = htonl(new_dvp->fid.vnode); + *bp++ = htonl(new_dvp->fid.unique); *bp++ = htonl(n_namesz); - memcpy(bp, new_name, n_namesz); + memcpy(bp, new_name->name, n_namesz); bp = (void *) bp + n_namesz; if (n_padsz > 0) { memset(bp, 0, n_padsz); bp = (void *) bp + n_padsz; } - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name); + afs_make_op_call(op, call, GFP_NOFS); } /* - * deliver reply data to an FS.StoreData + * Deliver reply data to FS.StoreData or FS.StoreStatus */ static int afs_deliver_fs_store_data(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; const __be32 *bp; int ret; @@ -1098,10 +1025,8 @@ static int afs_deliver_fs_store_data(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -1127,164 +1052,96 @@ static const struct afs_call_type afs_RXFSStoreData64 = { /* * store a set of pages to a very large file */ -static int afs_fs_store_data64(struct afs_fs_cursor *fc, - struct address_space *mapping, - pgoff_t first, pgoff_t last, - unsigned offset, unsigned to, - loff_t size, loff_t pos, loff_t i_size, - struct afs_status_cb *scb) +static void afs_fs_store_data64(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); - call = afs_alloc_flat_call(net, &afs_RXFSStoreData64, + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData64, (4 + 6 + 3 * 2) * 4, (21 + 6) * 4); if (!call) - return -ENOMEM; + return afs_op_nomem(op); - call->key = fc->key; - call->mapping = mapping; - call->first = first; - call->last = last; - call->first_offset = offset; - call->last_to = to; - call->send_pages = true; - call->out_scb = scb; + call->write_iter = op->store.write_iter; /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSSTOREDATA64); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); *bp++ = htonl(AFS_SET_MTIME); /* mask */ - *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ *bp++ = 0; /* segment size */ - *bp++ = htonl(pos >> 32); - *bp++ = htonl((u32) pos); - *bp++ = htonl(size >> 32); - *bp++ = htonl((u32) size); - *bp++ = htonl(i_size >> 32); - *bp++ = htonl((u32) i_size); - - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + *bp++ = htonl(upper_32_bits(op->store.pos)); + *bp++ = htonl(lower_32_bits(op->store.pos)); + *bp++ = htonl(upper_32_bits(op->store.size)); + *bp++ = htonl(lower_32_bits(op->store.size)); + *bp++ = htonl(upper_32_bits(op->store.i_size)); + *bp++ = htonl(lower_32_bits(op->store.i_size)); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* - * store a set of pages + * Write data to a file on the server. */ -int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, - pgoff_t first, pgoff_t last, - unsigned offset, unsigned to, - struct afs_status_cb *scb) +void afs_fs_store_data(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); - loff_t size, pos, i_size; __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_store_data(fc, mapping, first, last, offset, to, scb); - _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - - size = (loff_t)to - (loff_t)offset; - if (first != last) - size += (loff_t)(last - first) << PAGE_SHIFT; - pos = (loff_t)first << PAGE_SHIFT; - pos += offset; - - i_size = i_size_read(&vnode->vfs_inode); - if (pos + size > i_size) - i_size = size + pos; + key_serial(op->key), vp->fid.vid, vp->fid.vnode); _debug("size %llx, at %llx, i_size %llx", - (unsigned long long) size, (unsigned long long) pos, - (unsigned long long) i_size); + (unsigned long long)op->store.size, + (unsigned long long)op->store.pos, + (unsigned long long)op->store.i_size); - if (pos >> 32 || i_size >> 32 || size >> 32 || (pos + size) >> 32) - return afs_fs_store_data64(fc, mapping, first, last, offset, to, - size, pos, i_size, scb); + if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags)) + return afs_fs_store_data64(op); - call = afs_alloc_flat_call(net, &afs_RXFSStoreData, + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData, (4 + 6 + 3) * 4, (21 + 6) * 4); if (!call) - return -ENOMEM; + return afs_op_nomem(op); - call->key = fc->key; - call->mapping = mapping; - call->first = first; - call->last = last; - call->first_offset = offset; - call->last_to = to; - call->send_pages = true; - call->out_scb = scb; + call->write_iter = op->store.write_iter; /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSSTOREDATA); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); *bp++ = htonl(AFS_SET_MTIME); /* mask */ - *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ + *bp++ = htonl(op->mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ *bp++ = 0; /* segment size */ - *bp++ = htonl(pos); - *bp++ = htonl(size); - *bp++ = htonl(i_size); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); -} - -/* - * deliver reply data to an FS.StoreStatus - */ -static int afs_deliver_fs_store_status(struct afs_call *call) -{ - const __be32 *bp; - int ret; - - _enter(""); - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; + *bp++ = htonl(lower_32_bits(op->store.pos)); + *bp++ = htonl(lower_32_bits(op->store.size)); + *bp++ = htonl(lower_32_bits(op->store.i_size)); - /* unmarshall the reply once we've received all of it */ - bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_AFSVolSync(&bp, call->out_volsync); - - _leave(" = 0 [done]"); - return 0; + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1293,21 +1150,21 @@ static int afs_deliver_fs_store_status(struct afs_call *call) static const struct afs_call_type afs_RXFSStoreStatus = { .name = "FS.StoreStatus", .op = afs_FS_StoreStatus, - .deliver = afs_deliver_fs_store_status, + .deliver = afs_deliver_fs_store_data, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData_as_Status = { .name = "FS.StoreData", .op = afs_FS_StoreData, - .deliver = afs_deliver_fs_store_status, + .deliver = afs_deliver_fs_store_data, .destructor = afs_flat_call_destructor, }; static const struct afs_call_type afs_RXFSStoreData64_as_Status = { .name = "FS.StoreData64", .op = afs_FS_StoreData64, - .deliver = afs_deliver_fs_store_status, + .deliver = afs_deliver_fs_store_data, .destructor = afs_flat_call_destructor, }; @@ -1315,85 +1172,74 @@ static const struct afs_call_type afs_RXFSStoreData64_as_Status = { * set the attributes on a very large file, using FS.StoreData rather than * FS.StoreStatus so as to alter the file size also */ -static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr, - struct afs_status_cb *scb) +static void afs_fs_setattr_size64(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct iattr *attr = op->setattr.attr; __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); - call = afs_alloc_flat_call(net, &afs_RXFSStoreData64_as_Status, + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData64_as_Status, (4 + 6 + 3 * 2) * 4, (21 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSSTOREDATA64); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = htonl(attr->ia_size >> 32); /* position of start of write */ - *bp++ = htonl((u32) attr->ia_size); - *bp++ = 0; /* size of write */ + *bp++ = htonl(upper_32_bits(attr->ia_size)); /* position of start of write */ + *bp++ = htonl(lower_32_bits(attr->ia_size)); + *bp++ = 0; /* size of write */ *bp++ = 0; - *bp++ = htonl(attr->ia_size >> 32); /* new file length */ - *bp++ = htonl((u32) attr->ia_size); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + *bp++ = htonl(upper_32_bits(attr->ia_size)); /* new file length */ + *bp++ = htonl(lower_32_bits(attr->ia_size)); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus * so as to alter the file size also */ -static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr, - struct afs_status_cb *scb) +static void afs_fs_setattr_size(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct iattr *attr = op->setattr.attr; __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); ASSERT(attr->ia_valid & ATTR_SIZE); - if (attr->ia_size >> 32) - return afs_fs_setattr_size64(fc, attr, scb); + if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags)) + return afs_fs_setattr_size64(op); - call = afs_alloc_flat_call(net, &afs_RXFSStoreData_as_Status, + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData_as_Status, (4 + 6 + 3) * 4, (21 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSSTOREDATA); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); xdr_encode_AFS_StoreStatus(&bp, attr); @@ -1401,57 +1247,44 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr, *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* * set the attributes on a file, using FS.StoreData if there's a change in file * size, and FS.StoreStatus otherwise */ -int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr, - struct afs_status_cb *scb) +void afs_fs_setattr(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct iattr *attr = op->setattr.attr; __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_setattr(fc, attr, scb); - if (attr->ia_valid & ATTR_SIZE) - return afs_fs_setattr_size(fc, attr, scb); + return afs_fs_setattr_size(op); _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); - call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus, + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreStatus, (4 + 6) * 4, (21 + 6) * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSSTORESTATUS); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); - xdr_encode_AFS_StoreStatus(&bp, attr); + xdr_encode_AFS_StoreStatus(&bp, op->setattr.attr); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1459,6 +1292,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr, */ static int afs_deliver_fs_get_volume_status(struct afs_call *call) { + struct afs_operation *op = call->op; const __be32 *bp; char *p; u32 size; @@ -1470,7 +1304,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) case 0: call->unmarshall++; afs_extract_to_buf(call, 12 * 4); - /* Fall through */ + fallthrough; /* extract the returned status record */ case 1: @@ -1480,10 +1314,10 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSFetchVolumeStatus(&bp, call->out_volstatus); + xdr_decode_AFSFetchVolumeStatus(&bp, &op->volstatus.vs); call->unmarshall++; afs_extract_to_tmp(call); - /* Fall through */ + fallthrough; /* extract the volume name length */ case 2: @@ -1494,12 +1328,11 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_volname_len); + return afs_protocol_error(call, afs_eproto_volname_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the volume name */ case 3: @@ -1513,7 +1346,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) _debug("volname '%s'", p); afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the offline message length */ case 4: @@ -1524,12 +1357,11 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_offline_msg_len); + return afs_protocol_error(call, afs_eproto_offline_msg_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the offline message */ case 5: @@ -1544,7 +1376,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the message of the day length */ case 6: @@ -1555,12 +1387,11 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_motd_len); + return afs_protocol_error(call, afs_eproto_motd_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the message of the day */ case 7: @@ -1574,6 +1405,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) _debug("motd '%s'", p); call->unmarshall++; + fallthrough; case 8: break; @@ -1596,37 +1428,26 @@ static const struct afs_call_type afs_RXFSGetVolumeStatus = { /* * fetch the status of a volume */ -int afs_fs_get_volume_status(struct afs_fs_cursor *fc, - struct afs_volume_status *vs) +void afs_fs_get_volume_status(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_get_volume_status(fc, vs); - _enter(""); - call = afs_alloc_flat_call(net, &afs_RXFSGetVolumeStatus, 2 * 4, + call = afs_alloc_flat_call(op->net, &afs_RXFSGetVolumeStatus, 2 * 4, max(12 * 4, AFSOPAQUEMAX + 1)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_volstatus = vs; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSGETVOLUMESTATUS); - bp[1] = htonl(vnode->fid.vid); + bp[1] = htonl(vp->fid.vid); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1634,6 +1455,7 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_xxxx_lock(struct afs_call *call) { + struct afs_operation *op = call->op; const __be32 *bp; int ret; @@ -1645,7 +1467,7 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -1686,114 +1508,80 @@ static const struct afs_call_type afs_RXFSReleaseLock = { /* * Set a lock on a file */ -int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type, - struct afs_status_cb *scb) +void afs_fs_set_lock(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_set_lock(fc, type, scb); - _enter(""); - call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSSetLock, 5 * 4, 6 * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->lvnode = vnode; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSSETLOCK); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); - *bp++ = htonl(type); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_calli(call, &vnode->fid, type); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + *bp++ = htonl(op->lock.type); + + trace_afs_make_fs_calli(call, &vp->fid, op->lock.type); + afs_make_op_call(op, call, GFP_NOFS); } /* * extend a lock on a file */ -int afs_fs_extend_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) +void afs_fs_extend_lock(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_extend_lock(fc, scb); - _enter(""); - call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSExtendLock, 4 * 4, 6 * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->lvnode = vnode; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSEXTENDLOCK); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* * release a lock on a file */ -int afs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) +void afs_fs_release_lock(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_release_lock(fc, scb); - _enter(""); - call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4); + call = afs_alloc_flat_call(op->net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->lvnode = vnode; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSRELEASELOCK); - *bp++ = htonl(vnode->fid.vid); - *bp++ = htonl(vnode->fid.vnode); - *bp++ = htonl(vnode->fid.unique); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1837,7 +1625,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net, bp = call->request; *bp++ = htonl(FSGIVEUPALLCALLBACKS); - /* Can't take a ref on server */ + call->server = afs_use_server(server, afs_server_trace_give_up_cb); afs_make_call(ac, call, GFP_NOFS); return afs_wait_for_call_to_complete(call, ac); } @@ -1856,7 +1644,7 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* Extract the capabilities word count */ case 1: @@ -1865,20 +1653,33 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) return ret; count = ntohl(call->tmp); - call->count = count; call->count2 = count; - afs_extract_discard(call, count * sizeof(__be32)); + if (count == 0) { + call->unmarshall = 4; + call->tmp = 0; + break; + } + + /* Extract the first word of the capabilities to call->tmp */ + afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; - /* Extract capabilities words */ case 2: ret = afs_extract_data(call, false); if (ret < 0) return ret; - /* TODO: Examine capabilities */ + afs_extract_discard(call, (count - 1) * sizeof(__be32)); + call->unmarshall++; + fallthrough; + + /* Extract remaining capabilities words */ + case 3: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; call->unmarshall++; break; @@ -1900,14 +1701,13 @@ static const struct afs_call_type afs_RXFSGetCapabilities = { }; /* - * Probe a fileserver for the capabilities that it supports. This can - * return up to 196 words. + * Probe a fileserver for the capabilities that it supports. This RPC can + * reply with up to 196 words. The operation is asynchronous and if we managed + * to allocate a call, true is returned the result is delivered through the + * ->done() - otherwise we return false to indicate we didn't even try. */ -struct afs_call *afs_fs_get_capabilities(struct afs_net *net, - struct afs_server *server, - struct afs_addr_cursor *ac, - struct key *key, - unsigned int server_index) +bool afs_fs_get_capabilities(struct afs_net *net, struct afs_server *server, + struct afs_addr_cursor *ac, struct key *key) { struct afs_call *call; __be32 *bp; @@ -1916,11 +1716,10 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, call = afs_alloc_flat_call(net, &afs_RXFSGetCapabilities, 1 * 4, 16 * 4); if (!call) - return ERR_PTR(-ENOMEM); + return false; call->key = key; - call->server = afs_get_server(server, afs_server_trace_get_caps); - call->server_index = server_index; + call->server = afs_use_server(server, afs_server_trace_get_caps); call->upgrade = true; call->async = true; call->max_lifespan = AFS_PROBE_MAX_LIFESPAN; @@ -1929,87 +1728,10 @@ struct afs_call *afs_fs_get_capabilities(struct afs_net *net, bp = call->request; *bp++ = htonl(FSGETCAPABILITIES); - /* Can't take a ref on server */ trace_afs_make_fs_call(call, NULL); afs_make_call(ac, call, GFP_NOFS); - return call; -} - -/* - * Deliver reply data to an FS.FetchStatus with no vnode. - */ -static int afs_deliver_fs_fetch_status(struct afs_call *call) -{ - const __be32 *bp; - int ret; - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; - - /* unmarshall the reply once we've received all of it */ - bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_AFSCallBack(&bp, call, call->out_scb); - xdr_decode_AFSVolSync(&bp, call->out_volsync); - - _leave(" = 0 [done]"); - return 0; -} - -/* - * FS.FetchStatus operation type - */ -static const struct afs_call_type afs_RXFSFetchStatus = { - .name = "FS.FetchStatus", - .op = afs_FS_FetchStatus, - .deliver = afs_deliver_fs_fetch_status, - .destructor = afs_flat_call_destructor, -}; - -/* - * Fetch the status information for a fid without needing a vnode handle. - */ -int afs_fs_fetch_status(struct afs_fs_cursor *fc, - struct afs_net *net, - struct afs_fid *fid, - struct afs_status_cb *scb, - struct afs_volsync *volsync) -{ - struct afs_call *call; - __be32 *bp; - - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_fetch_status(fc, net, fid, scb, volsync); - - _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), fid->vid, fid->vnode); - - call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4); - if (!call) { - fc->ac.error = -ENOMEM; - return -ENOMEM; - } - - call->key = fc->key; - call->out_fid = fid; - call->out_scb = scb; - call->out_volsync = volsync; - - /* marshall the parameters */ - bp = call->request; - bp[0] = htonl(FSFETCHSTATUS); - bp[1] = htonl(fid->vid); - bp[2] = htonl(fid->vnode); - bp[3] = htonl(fid->unique); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + afs_put_call(call); + return true; } /* @@ -2017,6 +1739,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) { + struct afs_operation *op = call->op; struct afs_status_cb *scb; const __be32 *bp; u32 tmp; @@ -2028,7 +1751,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* Extract the file status count and array in two steps */ case 1: @@ -2038,16 +1761,15 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; tmp = ntohl(call->tmp); - _debug("status count: %u/%u", tmp, call->count2); - if (tmp != call->count2) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_ibulkst_count); + _debug("status count: %u/%u", tmp, op->nr_files); + if (tmp != op->nr_files) + return afs_protocol_error(call, afs_eproto_ibulkst_count); call->count = 0; call->unmarshall++; more_counts: afs_extract_to_buf(call, 21 * sizeof(__be32)); - /* Fall through */ + fallthrough; case 2: _debug("extract status array %u", call->count); @@ -2055,20 +1777,29 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) if (ret < 0) return ret; + switch (call->count) { + case 0: + scb = &op->file[0].scb; + break; + case 1: + scb = &op->file[1].scb; + break; + default: + scb = &op->more_files[call->count - 2].scb; + break; + } + bp = call->buffer; - scb = &call->out_scb[call->count]; - ret = xdr_decode_AFSFetchStatus(&bp, call, scb); - if (ret < 0) - return ret; + xdr_decode_AFSFetchStatus(&bp, call, scb); call->count++; - if (call->count < call->count2) + if (call->count < op->nr_files) goto more_counts; call->count = 0; call->unmarshall++; afs_extract_to_tmp(call); - /* Fall through */ + fallthrough; /* Extract the callback count and array in two steps */ case 3: @@ -2079,14 +1810,13 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) tmp = ntohl(call->tmp); _debug("CB count: %u", tmp); - if (tmp != call->count2) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_ibulkst_cb_count); + if (tmp != op->nr_files) + return afs_protocol_error(call, afs_eproto_ibulkst_cb_count); call->count = 0; call->unmarshall++; more_cbs: afs_extract_to_buf(call, 3 * sizeof(__be32)); - /* Fall through */ + fallthrough; case 4: _debug("extract CB array"); @@ -2095,16 +1825,27 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; _debug("unmarshall CB array"); + switch (call->count) { + case 0: + scb = &op->file[0].scb; + break; + case 1: + scb = &op->file[1].scb; + break; + default: + scb = &op->more_files[call->count - 2].scb; + break; + } + bp = call->buffer; - scb = &call->out_scb[call->count]; xdr_decode_AFSCallBack(&bp, call, scb); call->count++; - if (call->count < call->count2) + if (call->count < op->nr_files) goto more_cbs; afs_extract_to_buf(call, 6 * sizeof(__be32)); call->unmarshall++; - /* Fall through */ + fallthrough; case 5: ret = afs_extract_data(call, false); @@ -2112,9 +1853,10 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSVolSync(&bp, &op->volsync); call->unmarshall++; + fallthrough; case 6: break; @@ -2124,6 +1866,16 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) return 0; } +static void afs_done_fs_inline_bulk_status(struct afs_call *call) +{ + if (call->error == -ECONNABORTED && + call->abort_code == RX_INVALID_OPERATION) { + set_bit(AFS_SERVER_FL_NO_IBULK, &call->server->flags); + if (call->op) + set_bit(AFS_VOLUME_MAYBE_NO_IBULK, &call->op->volume->flags); + } +} + /* * FS.InlineBulkStatus operation type */ @@ -2131,58 +1883,53 @@ static const struct afs_call_type afs_RXFSInlineBulkStatus = { .name = "FS.InlineBulkStatus", .op = afs_FS_InlineBulkStatus, .deliver = afs_deliver_fs_inline_bulk_status, + .done = afs_done_fs_inline_bulk_status, .destructor = afs_flat_call_destructor, }; /* * Fetch the status information for up to 50 files */ -int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, - struct afs_net *net, - struct afs_fid *fids, - struct afs_status_cb *statuses, - unsigned int nr_fids, - struct afs_volsync *volsync) +void afs_fs_inline_bulk_status(struct afs_operation *op) { + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; struct afs_call *call; __be32 *bp; int i; - if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) - return yfs_fs_inline_bulk_status(fc, net, fids, statuses, - nr_fids, volsync); + if (test_bit(AFS_SERVER_FL_NO_IBULK, &op->server->flags)) { + op->error = -ENOTSUPP; + return; + } _enter(",%x,{%llx:%llu},%u", - key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); + key_serial(op->key), vp->fid.vid, vp->fid.vnode, op->nr_files); - call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus, - (2 + nr_fids * 3) * 4, + call = afs_alloc_flat_call(op->net, &afs_RXFSInlineBulkStatus, + (2 + op->nr_files * 3) * 4, 21 * 4); - if (!call) { - fc->ac.error = -ENOMEM; - return -ENOMEM; - } - - call->key = fc->key; - call->out_scb = statuses; - call->out_volsync = volsync; - call->count2 = nr_fids; + if (!call) + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; *bp++ = htonl(FSINLINEBULKSTATUS); - *bp++ = htonl(nr_fids); - for (i = 0; i < nr_fids; i++) { - *bp++ = htonl(fids[i].vid); - *bp++ = htonl(fids[i].vnode); - *bp++ = htonl(fids[i].unique); + *bp++ = htonl(op->nr_files); + *bp++ = htonl(dvp->fid.vid); + *bp++ = htonl(dvp->fid.vnode); + *bp++ = htonl(dvp->fid.unique); + *bp++ = htonl(vp->fid.vid); + *bp++ = htonl(vp->fid.vnode); + *bp++ = htonl(vp->fid.unique); + for (i = 0; i < op->nr_files - 2; i++) { + *bp++ = htonl(op->more_files[i].fid.vid); + *bp++ = htonl(op->more_files[i].fid.vnode); + *bp++ = htonl(op->more_files[i].fid.unique); } - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &fids[0]); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -2190,6 +1937,8 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc, */ static int afs_deliver_fs_fetch_acl(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; struct afs_acl *acl; const __be32 *bp; unsigned int size; @@ -2201,7 +1950,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the returned data length */ case 1: @@ -2215,11 +1964,11 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) acl = kmalloc(struct_size(acl, data, size), GFP_KERNEL); if (!acl) return -ENOMEM; - call->ret_acl = acl; + op->acl = acl; acl->size = call->count2; afs_extract_begin(call, acl->data, size); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the returned data */ case 2: @@ -2229,7 +1978,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) afs_extract_to_buf(call, (21 + 6) * 4); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the metadata */ case 3: @@ -2238,12 +1987,11 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_AFSVolSync(&bp, call->out_volsync); + xdr_decode_AFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_AFSVolSync(&bp, &op->volsync); call->unmarshall++; + fallthrough; case 4: break; @@ -2253,12 +2001,6 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) return 0; } -static void afs_destroy_fs_fetch_acl(struct afs_call *call) -{ - kfree(call->ret_acl); - afs_flat_call_destructor(call); -} - /* * FS.FetchACL operation type */ @@ -2266,68 +2008,33 @@ static const struct afs_call_type afs_RXFSFetchACL = { .name = "FS.FetchACL", .op = afs_FS_FetchACL, .deliver = afs_deliver_fs_fetch_acl, - .destructor = afs_destroy_fs_fetch_acl, }; /* * Fetch the ACL for a file. */ -struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *fc, - struct afs_status_cb *scb) +void afs_fs_fetch_acl(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); - call = afs_alloc_flat_call(net, &afs_RXFSFetchACL, 16, (21 + 6) * 4); - if (!call) { - fc->ac.error = -ENOMEM; - return ERR_PTR(-ENOMEM); - } - - call->key = fc->key; - call->ret_acl = NULL; - call->out_scb = scb; - call->out_volsync = NULL; + call = afs_alloc_flat_call(op->net, &afs_RXFSFetchACL, 16, (21 + 6) * 4); + if (!call) + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSFETCHACL); - bp[1] = htonl(vnode->fid.vid); - bp[2] = htonl(vnode->fid.vnode); - bp[3] = htonl(vnode->fid.unique); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_make_call(&fc->ac, call, GFP_KERNEL); - return (struct afs_acl *)afs_wait_for_call_to_complete(call, &fc->ac); -} - -/* - * Deliver reply data to any operation that returns file status and volume - * sync. - */ -static int afs_deliver_fs_file_status_and_vol(struct afs_call *call) -{ - const __be32 *bp; - int ret; - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; - - bp = call->buffer; - ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_AFSVolSync(&bp, call->out_volsync); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); - _leave(" = 0 [done]"); - return 0; + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_KERNEL); } /* @@ -2343,42 +2050,34 @@ static const struct afs_call_type afs_RXFSStoreACL = { /* * Fetch the ACL for a file. */ -int afs_fs_store_acl(struct afs_fs_cursor *fc, const struct afs_acl *acl, - struct afs_status_cb *scb) +void afs_fs_store_acl(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + const struct afs_acl *acl = op->acl; size_t size; __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); size = round_up(acl->size, 4); - call = afs_alloc_flat_call(net, &afs_RXFSStoreACL, + call = afs_alloc_flat_call(op->net, &afs_RXFSStoreACL, 5 * 4 + size, (21 + 6) * 4); - if (!call) { - fc->ac.error = -ENOMEM; - return -ENOMEM; - } - - call->key = fc->key; - call->out_scb = scb; - call->out_volsync = NULL; + if (!call) + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp[0] = htonl(FSSTOREACL); - bp[1] = htonl(vnode->fid.vid); - bp[2] = htonl(vnode->fid.vnode); - bp[3] = htonl(vnode->fid.unique); + bp[1] = htonl(vp->fid.vid); + bp[2] = htonl(vp->fid.vnode); + bp[3] = htonl(vp->fid.unique); bp[4] = htonl(acl->size); memcpy(&bp[5], acl->data, acl->size); if (acl->size != size) memset((void *)&bp[5] + acl->size, 0, size - acl->size); - trace_afs_make_fs_call(call, &vnode->fid); - afs_make_call(&fc->ac, call, GFP_KERNEL); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_KERNEL); } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 281470fe1183..6d3a3dbe4928 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -27,7 +27,6 @@ static const struct inode_operations afs_symlink_inode_operations = { .get_link = page_get_link, - .listxattr = afs_listxattr, }; static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode) @@ -55,28 +54,28 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren } /* - * Set the file size and block count. Estimate the number of 512 bytes blocks - * used, rounded up to nearest 1K for consistency with other AFS clients. + * Set parameters for the netfs library */ -static void afs_set_i_size(struct afs_vnode *vnode, u64 size) +static void afs_set_netfs_context(struct afs_vnode *vnode) { - i_size_write(&vnode->vfs_inode, size); - vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1; + netfs_inode_init(&vnode->netfs, &afs_req_ops); } /* * Initialise an inode from the vnode status. */ -static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, - struct afs_cb_interest *cbi, - struct afs_vnode *parent_vnode, - struct afs_status_cb *scb) +static int afs_inode_init_from_status(struct afs_operation *op, + struct afs_vnode_param *vp, + struct afs_vnode *vnode) { - struct afs_cb_interest *old_cbi = NULL; - struct afs_file_status *status = &scb->status; + struct afs_file_status *status = &vp->scb.status; struct inode *inode = AFS_VNODE_TO_I(vnode); struct timespec64 t; + _enter("{%llx:%llu.%u} %s", + vp->fid.vid, vp->fid.vnode, vp->fid.unique, + op->type ? op->type->name : "???"); + _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", status->type, status->nlink, @@ -86,28 +85,33 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, write_seqlock(&vnode->cb_lock); + vnode->cb_v_break = op->cb_v_break; + vnode->cb_s_break = op->cb_s_break; vnode->status = *status; t = status->mtime_client; inode->i_ctime = t; inode->i_mtime = t; inode->i_atime = t; + inode->i_flags |= S_NOATIME; inode->i_uid = make_kuid(&init_user_ns, status->owner); inode->i_gid = make_kgid(&init_user_ns, status->group); - set_nlink(&vnode->vfs_inode, status->nlink); + set_nlink(&vnode->netfs.inode, status->nlink); switch (status->type) { case AFS_FTYPE_FILE: - inode->i_mode = S_IFREG | status->mode; + inode->i_mode = S_IFREG | (status->mode & S_IALLUGO); inode->i_op = &afs_file_inode_operations; inode->i_fop = &afs_file_operations; - inode->i_mapping->a_ops = &afs_fs_aops; + inode->i_mapping->a_ops = &afs_file_aops; + mapping_set_large_folios(inode->i_mapping); break; case AFS_FTYPE_DIR: - inode->i_mode = S_IFDIR | status->mode; + inode->i_mode = S_IFDIR | (status->mode & S_IALLUGO); inode->i_op = &afs_dir_inode_operations; inode->i_fop = &afs_dir_file_operations; inode->i_mapping->a_ops = &afs_dir_aops; + mapping_set_large_folios(inode->i_mapping); break; case AFS_FTYPE_SYMLINK: /* Symlinks with a mode of 0644 are actually mountpoints. */ @@ -119,57 +123,57 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key, inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_mntpt_inode_operations; inode->i_fop = &afs_mntpt_file_operations; - inode->i_mapping->a_ops = &afs_fs_aops; + inode->i_mapping->a_ops = &afs_symlink_aops; } else { inode->i_mode = S_IFLNK | status->mode; inode->i_op = &afs_symlink_inode_operations; - inode->i_mapping->a_ops = &afs_fs_aops; + inode->i_mapping->a_ops = &afs_symlink_aops; } inode_nohighmem(inode); break; default: - dump_vnode(vnode, parent_vnode); + dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL); write_sequnlock(&vnode->cb_lock); - return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type); + return afs_protocol_error(NULL, afs_eproto_file_type); } afs_set_i_size(vnode, status->size); + afs_set_netfs_context(vnode); vnode->invalid_before = status->data_version; - inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); + inode_set_iversion_raw(&vnode->netfs.inode, status->data_version); - if (!scb->have_cb) { + if (!vp->scb.have_cb) { /* it's a symlink we just created (the fileserver * didn't give us a callback) */ vnode->cb_expires_at = ktime_get_real_seconds(); } else { - vnode->cb_expires_at = scb->callback.expires_at; - old_cbi = rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->cb_lock.lock)); - if (cbi != old_cbi) - rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(cbi)); - else - old_cbi = NULL; + vnode->cb_expires_at = vp->scb.callback.expires_at; + vnode->cb_server = op->server; set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } write_sequnlock(&vnode->cb_lock); - afs_put_cb_interest(afs_v2net(vnode), old_cbi); return 0; } /* * Update the core inode struct from a returned status record. */ -static void afs_apply_status(struct afs_fs_cursor *fc, - struct afs_vnode *vnode, - struct afs_status_cb *scb, - const afs_dataversion_t *expected_version) +static void afs_apply_status(struct afs_operation *op, + struct afs_vnode_param *vp) { - struct afs_file_status *status = &scb->status; + struct afs_file_status *status = &vp->scb.status; + struct afs_vnode *vnode = vp->vnode; + struct inode *inode = &vnode->netfs.inode; struct timespec64 t; umode_t mode; bool data_changed = false; + bool change_size = vp->set_size; + + _enter("{%llx:%llu.%u} %s", + vp->fid.vid, vp->fid.vnode, vp->fid.unique, + op->type ? op->type->name : "???"); BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags)); @@ -179,44 +183,44 @@ static void afs_apply_status(struct afs_fs_cursor *fc, vnode->fid.vnode, vnode->fid.unique, status->type, vnode->status.type); - afs_protocol_error(NULL, -EBADMSG, afs_eproto_bad_status); + afs_protocol_error(NULL, afs_eproto_bad_status); return; } if (status->nlink != vnode->status.nlink) - set_nlink(&vnode->vfs_inode, status->nlink); + set_nlink(inode, status->nlink); if (status->owner != vnode->status.owner) - vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner); + inode->i_uid = make_kuid(&init_user_ns, status->owner); if (status->group != vnode->status.group) - vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group); + inode->i_gid = make_kgid(&init_user_ns, status->group); if (status->mode != vnode->status.mode) { - mode = vnode->vfs_inode.i_mode; + mode = inode->i_mode; mode &= ~S_IALLUGO; - mode |= status->mode; - WRITE_ONCE(vnode->vfs_inode.i_mode, mode); + mode |= status->mode & S_IALLUGO; + WRITE_ONCE(inode->i_mode, mode); } t = status->mtime_client; - vnode->vfs_inode.i_ctime = t; - vnode->vfs_inode.i_mtime = t; - vnode->vfs_inode.i_atime = t; + inode->i_mtime = t; + if (vp->update_ctime) + inode->i_ctime = op->ctime; if (vnode->status.data_version != status->data_version) data_changed = true; vnode->status = *status; - if (expected_version && - *expected_version != status->data_version) { + if (vp->dv_before + vp->dv_delta != status->data_version) { if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) - pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n", + pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n", vnode->fid.vid, vnode->fid.vnode, - (unsigned long long)*expected_version, + (unsigned long long)vp->dv_before + vp->dv_delta, (unsigned long long)status->data_version, - fc->type ? fc->type->name : "???"); + op->type ? op->type->name : "???", + op->debug_id); vnode->invalid_before = status->data_version; if (vnode->status.type == AFS_FTYPE_DIR) { @@ -225,6 +229,7 @@ static void afs_apply_status(struct afs_fs_cursor *fc, } else { set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); } + change_size = true; } else if (vnode->status.type == AFS_FTYPE_DIR) { /* Expected directory change is handled elsewhere so * that we can locally edit the directory and save on a @@ -232,33 +237,38 @@ static void afs_apply_status(struct afs_fs_cursor *fc, */ if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) data_changed = false; + change_size = true; } if (data_changed) { - inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - afs_set_i_size(vnode, status->size); + inode_set_iversion_raw(inode, status->data_version); + + /* Only update the size if the data version jumped. If the + * file is being modified locally, then we might have our own + * idea of what the size should be that's not the same as + * what's on the server. + */ + vnode->netfs.remote_i_size = status->size; + if (change_size) { + afs_set_i_size(vnode, status->size); + inode->i_ctime = t; + inode->i_atime = t; + } } } /* * Apply a callback to a vnode. */ -static void afs_apply_callback(struct afs_fs_cursor *fc, - struct afs_vnode *vnode, - struct afs_status_cb *scb, - unsigned int cb_break) +static void afs_apply_callback(struct afs_operation *op, + struct afs_vnode_param *vp) { - struct afs_cb_interest *old; - struct afs_callback *cb = &scb->callback; + struct afs_callback *cb = &vp->scb.callback; + struct afs_vnode *vnode = vp->vnode; - if (!afs_cb_is_broken(cb_break, vnode, fc->cbi)) { + if (!afs_cb_is_broken(vp->cb_break_before, vnode)) { vnode->cb_expires_at = cb->expires_at; - old = rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->cb_lock.lock)); - if (old != fc->cbi) { - rcu_assign_pointer(vnode->cb_interest, afs_get_cb_interest(fc->cbi)); - afs_put_cb_interest(afs_v2net(vnode), old); - } + vnode->cb_server = op->server; set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); } } @@ -267,106 +277,125 @@ static void afs_apply_callback(struct afs_fs_cursor *fc, * Apply the received status and callback to an inode all in the same critical * section to avoid races with afs_validate(). */ -void afs_vnode_commit_status(struct afs_fs_cursor *fc, - struct afs_vnode *vnode, - unsigned int cb_break, - const afs_dataversion_t *expected_version, - struct afs_status_cb *scb) +void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *vp) { - if (fc->ac.error != 0) - return; + struct afs_vnode *vnode = vp->vnode; + + _enter(""); write_seqlock(&vnode->cb_lock); - if (scb->have_error) { - if (scb->status.abort_code == VNOVNODE) { + if (vp->scb.have_error) { + /* A YFS server will return this from RemoveFile2 and AFS and + * YFS will return this from InlineBulkStatus. + */ + if (vp->scb.status.abort_code == VNOVNODE) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + clear_nlink(&vnode->netfs.inode); + __afs_break_callback(vnode, afs_cb_break_for_deleted); + op->flags &= ~AFS_OPERATION_DIR_CONFLICT; + } + } else if (vp->scb.have_status) { + if (vp->speculative && + (test_bit(AFS_VNODE_MODIFYING, &vnode->flags) || + vp->dv_before != vnode->status.data_version)) + /* Ignore the result of a speculative bulk status fetch + * if it splits around a modification op, thereby + * appearing to regress the data version. + */ + goto out; + afs_apply_status(op, vp); + if (vp->scb.have_cb) + afs_apply_callback(op, vp); + } else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) { + drop_nlink(&vnode->netfs.inode); + if (vnode->netfs.inode.i_nlink == 0) { set_bit(AFS_VNODE_DELETED, &vnode->flags); - clear_nlink(&vnode->vfs_inode); __afs_break_callback(vnode, afs_cb_break_for_deleted); } - } else { - if (scb->have_status) - afs_apply_status(fc, vnode, scb, expected_version); - if (scb->have_cb) - afs_apply_callback(fc, vnode, scb, cb_break); } +out: write_sequnlock(&vnode->cb_lock); - if (fc->ac.error == 0 && scb->have_status) - afs_cache_permit(vnode, fc->key, cb_break, scb); + if (vp->scb.have_status) + afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb); +} + +static void afs_fetch_status_success(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; + struct afs_vnode *vnode = vp->vnode; + int ret; + + if (vnode->netfs.inode.i_state & I_NEW) { + ret = afs_inode_init_from_status(op, vp, vnode); + op->error = ret; + if (ret == 0) + afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb); + } else { + afs_vnode_commit_status(op, vp); + } } +const struct afs_operation_ops afs_fetch_status_operation = { + .issue_afs_rpc = afs_fs_fetch_status, + .issue_yfs_rpc = yfs_fs_fetch_status, + .success = afs_fetch_status_success, + .aborted = afs_check_for_remote_deletion, +}; + /* * Fetch file status from the volume. */ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new, afs_access_t *_caller_access) { - struct afs_status_cb *scb; - struct afs_fs_cursor fc; - int ret; + struct afs_operation *op; _enter("%s,{%llx:%llu.%u,S=%lx}", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, vnode->flags); - scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - return -ENOMEM; - - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, true)) { - afs_dataversion_t data_version = vnode->status.data_version; + op = afs_alloc_operation(key, vnode->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_fetch_file_status(&fc, scb, NULL); - } + afs_op_set_vnode(op, 0, vnode); - if (fc.error) { - /* Do nothing. */ - } else if (is_new) { - ret = afs_inode_init_from_status(vnode, key, fc.cbi, - NULL, scb); - fc.error = ret; - if (ret == 0) - afs_cache_permit(vnode, key, fc.cb_break, scb); - } else { - afs_vnode_commit_status(&fc, vnode, fc.cb_break, - &data_version, scb); - } - afs_check_for_remote_deletion(&fc, vnode); - ret = afs_end_vnode_operation(&fc); - } + op->nr_files = 1; + op->ops = &afs_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); - if (ret == 0 && _caller_access) - *_caller_access = scb->status.caller_access; - kfree(scb); - _leave(" = %d", ret); - return ret; + if (_caller_access) + *_caller_access = op->file[0].scb.status.caller_access; + return afs_put_operation(op); } /* - * iget5() comparator + * ilookup() comparator */ -int afs_iget5_test(struct inode *inode, void *opaque) +int afs_ilookup5_test_by_fid(struct inode *inode, void *opaque) { - struct afs_iget_data *iget_data = opaque; struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_fid *fid = opaque; - return memcmp(&vnode->fid, &iget_data->fid, sizeof(iget_data->fid)) == 0; + return (fid->vnode == vnode->fid.vnode && + fid->vnode_hi == vnode->fid.vnode_hi && + fid->unique == vnode->fid.unique); } /* - * iget5() comparator for inode created by autocell operations - * - * These pseudo inodes don't match anything. + * iget5() comparator */ -static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque) +static int afs_iget5_test(struct inode *inode, void *opaque) { - return 0; + struct afs_vnode_param *vp = opaque; + //struct afs_vnode *vnode = AFS_FS_I(inode); + + return afs_ilookup5_test_by_fid(inode, &vp->fid); } /* @@ -374,191 +403,169 @@ static int afs_iget5_pseudo_dir_test(struct inode *inode, void *opaque) */ static int afs_iget5_set(struct inode *inode, void *opaque) { - struct afs_iget_data *iget_data = opaque; + struct afs_vnode_param *vp = opaque; + struct afs_super_info *as = AFS_FS_S(inode->i_sb); struct afs_vnode *vnode = AFS_FS_I(inode); - vnode->fid = iget_data->fid; - vnode->volume = iget_data->volume; - vnode->cb_v_break = iget_data->cb_v_break; - vnode->cb_s_break = iget_data->cb_s_break; + vnode->volume = as->volume; + vnode->fid = vp->fid; /* YFS supports 96-bit vnode IDs, but Linux only supports * 64-bit inode numbers. */ - inode->i_ino = iget_data->fid.vnode; - inode->i_generation = iget_data->fid.unique; + inode->i_ino = vnode->fid.vnode; + inode->i_generation = vnode->fid.unique; return 0; } /* - * Create an inode for a dynamic root directory or an autocell dynamic - * automount dir. + * Get a cache cookie for an inode. */ -struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) +static void afs_get_inode_cache(struct afs_vnode *vnode) { - struct afs_super_info *as; - struct afs_vnode *vnode; - struct inode *inode; - static atomic_t afs_autocell_ino; +#ifdef CONFIG_AFS_FSCACHE + struct { + __be32 vnode_id; + __be32 unique; + __be32 vnode_id_ext[2]; /* Allow for a 96-bit key */ + } __packed key; + struct afs_vnode_cache_aux aux; - struct afs_iget_data iget_data = { - .cb_v_break = 0, - .cb_s_break = 0, - }; + if (vnode->status.type != AFS_FTYPE_FILE) { + vnode->netfs.cache = NULL; + return; + } - _enter(""); + key.vnode_id = htonl(vnode->fid.vnode); + key.unique = htonl(vnode->fid.unique); + key.vnode_id_ext[0] = htonl(vnode->fid.vnode >> 32); + key.vnode_id_ext[1] = htonl(vnode->fid.vnode_hi); + afs_set_cache_aux(vnode, &aux); + + afs_vnode_set_cache(vnode, + fscache_acquire_cookie( + vnode->volume->cache, + vnode->status.type == AFS_FTYPE_FILE ? + 0 : FSCACHE_ADV_SINGLE_CHUNK, + &key, sizeof(key), + &aux, sizeof(aux), + vnode->status.size)); +#endif +} - as = sb->s_fs_info; - if (as->volume) { - iget_data.volume = as->volume; - iget_data.fid.vid = as->volume->vid; - } - if (root) { - iget_data.fid.vnode = 1; - iget_data.fid.unique = 1; - } else { - iget_data.fid.vnode = atomic_inc_return(&afs_autocell_ino); - iget_data.fid.unique = 0; - } +/* + * inode retrieval + */ +struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp) +{ + struct afs_vnode_param *dvp = &op->file[0]; + struct super_block *sb = dvp->vnode->netfs.inode.i_sb; + struct afs_vnode *vnode; + struct inode *inode; + int ret; + + _enter(",{%llx:%llu.%u},,", vp->fid.vid, vp->fid.vnode, vp->fid.unique); - inode = iget5_locked(sb, iget_data.fid.vnode, - afs_iget5_pseudo_dir_test, afs_iget5_set, - &iget_data); + inode = iget5_locked(sb, vp->fid.vnode, afs_iget5_test, afs_iget5_set, vp); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } - _debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }", - inode, inode->i_ino, iget_data.fid.vid, iget_data.fid.vnode, - iget_data.fid.unique); - vnode = AFS_FS_I(inode); - /* there shouldn't be an existing inode */ - BUG_ON(!(inode->i_state & I_NEW)); + _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }", + inode, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); - inode->i_size = 0; - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; - if (root) { - inode->i_op = &afs_dynroot_inode_operations; - inode->i_fop = &simple_dir_operations; - } else { - inode->i_op = &afs_autocell_inode_operations; - } - set_nlink(inode, 2); - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode); - inode->i_blocks = 0; - inode_set_iversion_raw(inode, 0); - inode->i_generation = 0; - - set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); - if (!root) { - set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - inode->i_flags |= S_AUTOMOUNT; + /* deal with an existing inode */ + if (!(inode->i_state & I_NEW)) { + _leave(" = %p", inode); + return inode; } - inode->i_flags |= S_NOATIME; + ret = afs_inode_init_from_status(op, vp, vnode); + if (ret < 0) + goto bad_inode; + + afs_get_inode_cache(vnode); + + /* success */ + clear_bit(AFS_VNODE_UNSET, &vnode->flags); unlock_new_inode(inode); _leave(" = %p", inode); return inode; + + /* failure */ +bad_inode: + iget_failed(inode); + _leave(" = %d [bad]", ret); + return ERR_PTR(ret); } -/* - * Get a cache cookie for an inode. - */ -static void afs_get_inode_cache(struct afs_vnode *vnode) +static int afs_iget5_set_root(struct inode *inode, void *opaque) { -#ifdef CONFIG_AFS_FSCACHE - struct { - u32 vnode_id; - u32 unique; - u32 vnode_id_ext[2]; /* Allow for a 96-bit key */ - } __packed key; - struct afs_vnode_cache_aux aux; - - if (vnode->status.type == AFS_FTYPE_DIR) { - vnode->cache = NULL; - return; - } + struct afs_super_info *as = AFS_FS_S(inode->i_sb); + struct afs_vnode *vnode = AFS_FS_I(inode); - key.vnode_id = vnode->fid.vnode; - key.unique = vnode->fid.unique; - key.vnode_id_ext[0] = vnode->fid.vnode >> 32; - key.vnode_id_ext[1] = vnode->fid.vnode_hi; - aux.data_version = vnode->status.data_version; - - vnode->cache = fscache_acquire_cookie(vnode->volume->cache, - &afs_vnode_cache_index_def, - &key, sizeof(key), - &aux, sizeof(aux), - vnode, vnode->status.size, true); -#endif + vnode->volume = as->volume; + vnode->fid.vid = as->volume->vid, + vnode->fid.vnode = 1; + vnode->fid.unique = 1; + inode->i_ino = 1; + inode->i_generation = 1; + return 0; } /* - * inode retrieval + * Set up the root inode for a volume. This is always vnode 1, unique 1 within + * the volume. */ -struct inode *afs_iget(struct super_block *sb, struct key *key, - struct afs_iget_data *iget_data, - struct afs_status_cb *scb, - struct afs_cb_interest *cbi, - struct afs_vnode *parent_vnode) +struct inode *afs_root_iget(struct super_block *sb, struct key *key) { - struct afs_super_info *as; + struct afs_super_info *as = AFS_FS_S(sb); + struct afs_operation *op; struct afs_vnode *vnode; - struct afs_fid *fid = &iget_data->fid; struct inode *inode; int ret; - _enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique); + _enter(",{%llx},,", as->volume->vid); - as = sb->s_fs_info; - iget_data->volume = as->volume; - - inode = iget5_locked(sb, fid->vnode, afs_iget5_test, afs_iget5_set, - iget_data); + inode = iget5_locked(sb, 1, NULL, afs_iget5_set_root, NULL); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } - _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }", - inode, fid->vid, fid->vnode, fid->unique); + _debug("GOT ROOT INODE %p { vl=%llx }", inode, as->volume->vid); + + BUG_ON(!(inode->i_state & I_NEW)); vnode = AFS_FS_I(inode); + vnode->cb_v_break = as->volume->cb_v_break, + afs_set_netfs_context(vnode); - /* deal with an existing inode */ - if (!(inode->i_state & I_NEW)) { - _leave(" = %p", inode); - return inode; + op = afs_alloc_operation(key, as->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto error; } - if (!scb) { - /* it's a remotely extant inode */ - ret = afs_fetch_status(vnode, key, true, NULL); - if (ret < 0) - goto bad_inode; - } else { - ret = afs_inode_init_from_status(vnode, key, cbi, parent_vnode, - scb); - if (ret < 0) - goto bad_inode; - } + afs_op_set_vnode(op, 0, vnode); + + op->nr_files = 1; + op->ops = &afs_fetch_status_operation; + ret = afs_do_sync_operation(op); + if (ret < 0) + goto error; afs_get_inode_cache(vnode); - /* success */ clear_bit(AFS_VNODE_UNSET, &vnode->flags); - inode->i_flags |= S_NOATIME; unlock_new_inode(inode); _leave(" = %p", inode); return inode; - /* failure */ -bad_inode: +error: iget_failed(inode); _leave(" = %d [bad]", ret); return ERR_PTR(ret); @@ -568,21 +575,49 @@ bad_inode: * mark the data attached to an inode as obsolete due to a write on the server * - might also want to ditch all the outstanding writes and dirty pages */ -void afs_zap_data(struct afs_vnode *vnode) +static void afs_zap_data(struct afs_vnode *vnode) { _enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode); -#ifdef CONFIG_AFS_FSCACHE - fscache_invalidate(vnode->cache); -#endif + afs_invalidate_cache(vnode, 0); /* nuke all the non-dirty pages that aren't locked, mapped or being * written back in a regular file and completely discard the pages in a * directory or symlink */ - if (S_ISREG(vnode->vfs_inode.i_mode)) - invalidate_remote_inode(&vnode->vfs_inode); + if (S_ISREG(vnode->netfs.inode.i_mode)) + invalidate_remote_inode(&vnode->netfs.inode); else - invalidate_inode_pages2(vnode->vfs_inode.i_mapping); + invalidate_inode_pages2(vnode->netfs.inode.i_mapping); +} + +/* + * Check to see if we have a server currently serving this volume and that it + * hasn't been reinitialised or dropped from the list. + */ +static bool afs_check_server_good(struct afs_vnode *vnode) +{ + struct afs_server_list *slist; + struct afs_server *server; + bool good; + int i; + + if (vnode->cb_fs_s_break == atomic_read(&vnode->volume->cell->fs_s_break)) + return true; + + rcu_read_lock(); + + slist = rcu_dereference(vnode->volume->servers); + for (i = 0; i < slist->nr_servers; i++) { + server = slist->servers[i].server; + if (server == vnode->cb_server) { + good = (vnode->cb_s_break == server->cb_s_break); + rcu_read_unlock(); + return good; + } + } + + rcu_read_unlock(); + return false; } /* @@ -590,62 +625,46 @@ void afs_zap_data(struct afs_vnode *vnode) */ bool afs_check_validity(struct afs_vnode *vnode) { - struct afs_cb_interest *cbi; - struct afs_server *server; - struct afs_volume *volume = vnode->volume; enum afs_cb_break_reason need_clear = afs_cb_break_no_break; time64_t now = ktime_get_real_seconds(); - bool valid; - unsigned int cb_break, cb_s_break, cb_v_break; + unsigned int cb_break; int seq = 0; do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); - cb_v_break = READ_ONCE(volume->cb_v_break); cb_break = vnode->cb_break; if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - cbi = rcu_dereference(vnode->cb_interest); - server = rcu_dereference(cbi->server); - cb_s_break = READ_ONCE(server->cb_s_break); - - if (vnode->cb_s_break != cb_s_break || - vnode->cb_v_break != cb_v_break) { - vnode->cb_s_break = cb_s_break; - vnode->cb_v_break = cb_v_break; - need_clear = afs_cb_break_for_vsbreak; - valid = false; - } else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { + if (vnode->cb_v_break != vnode->volume->cb_v_break) + need_clear = afs_cb_break_for_v_break; + else if (!afs_check_server_good(vnode)) + need_clear = afs_cb_break_for_s_reinit; + else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) need_clear = afs_cb_break_for_zap; - valid = false; - } else if (vnode->cb_expires_at - 10 <= now) { + else if (vnode->cb_expires_at - 10 <= now) need_clear = afs_cb_break_for_lapsed; - valid = false; - } else { - valid = true; - } } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { - valid = true; + ; } else { - vnode->cb_v_break = cb_v_break; - valid = false; + need_clear = afs_cb_break_no_promise; } } while (need_seqretry(&vnode->cb_lock, seq)); done_seqretry(&vnode->cb_lock, seq); - if (need_clear != afs_cb_break_no_break) { - write_seqlock(&vnode->cb_lock); - if (cb_break == vnode->cb_break) - __afs_break_callback(vnode, need_clear); - else - trace_afs_cb_miss(&vnode->fid, need_clear); - write_sequnlock(&vnode->cb_lock); - valid = false; - } + if (need_clear == afs_cb_break_no_break) + return true; - return valid; + write_seqlock(&vnode->cb_lock); + if (need_clear == afs_cb_break_no_promise) + vnode->cb_v_break = vnode->volume->cb_v_break; + else if (cb_break == vnode->cb_break) + __afs_break_callback(vnode, need_clear); + else + trace_afs_cb_miss(&vnode->fid, need_clear); + write_sequnlock(&vnode->cb_lock); + return false; } /* @@ -659,21 +678,20 @@ bool afs_check_validity(struct afs_vnode *vnode) */ int afs_validate(struct afs_vnode *vnode, struct key *key) { - bool valid; int ret; _enter("{v={%llx:%llu} fl=%lx},%x", vnode->fid.vid, vnode->fid.vnode, vnode->flags, key_serial(key)); - rcu_read_lock(); - valid = afs_check_validity(vnode); - rcu_read_unlock(); - - if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) - clear_nlink(&vnode->vfs_inode); + if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) { + if (vnode->netfs.inode.i_nlink) + clear_nlink(&vnode->netfs.inode); + goto valid; + } - if (valid) + if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags) && + afs_check_validity(vnode)) goto valid; down_write(&vnode->validate_lock); @@ -719,18 +737,34 @@ error_unlock: /* * read the attributes of an inode */ -int afs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_flags) +int afs_getattr(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct afs_vnode *vnode = AFS_FS_I(inode); - int seq = 0; + struct key *key; + int ret, seq = 0; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); + if (vnode->volume && + !(query_flags & AT_STATX_DONT_SYNC) && + !test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { + key = afs_request_key(vnode->volume->cell); + if (IS_ERR(key)) + return PTR_ERR(key); + ret = afs_validate(vnode, key); + key_put(key); + if (ret < 0) + return ret; + } + do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); - generic_fillattr(inode, stat); + generic_fillattr(&init_user_ns, inode, stat); + if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) && + stat->nlink > 0) + stat->nlink -= 1; } while (need_seqretry(&vnode->cb_lock, seq)); done_seqretry(&vnode->cb_lock, seq); @@ -755,10 +789,8 @@ int afs_drop_inode(struct inode *inode) */ void afs_evict_inode(struct inode *inode) { - struct afs_cb_interest *cbi; - struct afs_vnode *vnode; - - vnode = AFS_FS_I(inode); + struct afs_vnode_cache_aux aux; + struct afs_vnode *vnode = AFS_FS_I(inode); _enter("{%llx:%llu.%d}", vnode->fid.vid, @@ -770,16 +802,10 @@ void afs_evict_inode(struct inode *inode) ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); - write_seqlock(&vnode->cb_lock); - cbi = rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->cb_lock.lock)); - if (cbi) { - afs_put_cb_interest(afs_i2net(inode), cbi); - rcu_assign_pointer(vnode->cb_interest, NULL); - } - write_sequnlock(&vnode->cb_lock); + afs_set_cache_aux(vnode, &aux); + fscache_clear_inode_writeback(afs_vnode_cache(vnode), inode, &aux); + clear_inode(inode); while (!list_empty(&vnode->wb_keys)) { struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next, @@ -788,16 +814,8 @@ void afs_evict_inode(struct inode *inode) afs_put_wb_key(wbk); } -#ifdef CONFIG_AFS_FSCACHE - { - struct afs_vnode_cache_aux aux; - - aux.data_version = vnode->status.data_version; - fscache_relinquish_cookie(vnode->cache, &aux, - test_bit(AFS_VNODE_DELETED, &vnode->flags)); - vnode->cache = NULL; - } -#endif + fscache_relinquish_cookie(afs_vnode_cache(vnode), + test_bit(AFS_VNODE_DELETED, &vnode->flags)); afs_prune_wb_keys(vnode); afs_put_permits(rcu_access_pointer(vnode->permit_cache)); @@ -808,69 +826,139 @@ void afs_evict_inode(struct inode *inode) _leave(""); } +static void afs_setattr_success(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct inode *inode = &vp->vnode->netfs.inode; + loff_t old_i_size = i_size_read(inode); + + op->setattr.old_i_size = old_i_size; + afs_vnode_commit_status(op, vp); + /* inode->i_size has now been changed. */ + + if (op->setattr.attr->ia_valid & ATTR_SIZE) { + loff_t size = op->setattr.attr->ia_size; + if (size > old_i_size) + pagecache_isize_extended(inode, old_i_size, size); + } +} + +static void afs_setattr_edit_file(struct afs_operation *op) +{ + struct afs_vnode_param *vp = &op->file[0]; + struct inode *inode = &vp->vnode->netfs.inode; + + if (op->setattr.attr->ia_valid & ATTR_SIZE) { + loff_t size = op->setattr.attr->ia_size; + loff_t i_size = op->setattr.old_i_size; + + if (size < i_size) + truncate_pagecache(inode, size); + if (size != i_size) + fscache_resize_cookie(afs_vnode_cache(vp->vnode), + vp->scb.status.size); + } +} + +static const struct afs_operation_ops afs_setattr_operation = { + .issue_afs_rpc = afs_fs_setattr, + .issue_yfs_rpc = yfs_fs_setattr, + .success = afs_setattr_success, + .edit_dir = afs_setattr_edit_file, +}; + /* * set the attributes of an inode */ -int afs_setattr(struct dentry *dentry, struct iattr *attr) +int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *attr) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + const unsigned int supported = + ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | + ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET | ATTR_TOUCH; + struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - struct key *key; - int ret = -ENOMEM; + struct inode *inode = &vnode->netfs.inode; + loff_t i_size; + int ret; _enter("{%llx:%llu},{n=%pd},%x", vnode->fid.vid, vnode->fid.vnode, dentry, attr->ia_valid); - if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | - ATTR_MTIME))) { + if (!(attr->ia_valid & supported)) { _leave(" = 0 [unsupported]"); return 0; } - scb = kzalloc(sizeof(struct afs_status_cb), GFP_KERNEL); - if (!scb) - goto error; + i_size = i_size_read(inode); + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(inode->i_mode)) + return -EISDIR; - /* flush any dirty data outstanding on a regular file */ - if (S_ISREG(vnode->vfs_inode.i_mode)) - filemap_write_and_wait(vnode->vfs_inode.i_mapping); + ret = inode_newsize_ok(inode, attr->ia_size); + if (ret) + return ret; - if (attr->ia_valid & ATTR_FILE) { - key = afs_file_key(attr->ia_file); - } else { - key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_scb; - } + if (attr->ia_size == i_size) + attr->ia_valid &= ~ATTR_SIZE; } - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, false)) { - afs_dataversion_t data_version = vnode->status.data_version; + fscache_use_cookie(afs_vnode_cache(vnode), true); - if (attr->ia_valid & ATTR_SIZE) - data_version++; + /* Prevent any new writebacks from starting whilst we do this. */ + down_write(&vnode->validate_lock); + + if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) { + loff_t size = attr->ia_size; + + /* Wait for any outstanding writes to the server to complete */ + loff_t from = min(size, i_size); + loff_t to = max(size, i_size); + ret = filemap_fdatawait_range(inode->i_mapping, from, to); + if (ret < 0) + goto out_unlock; - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_setattr(&fc, attr, scb); + /* Don't talk to the server if we're just shortening in-memory + * writes that haven't gone to the server yet. + */ + if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) && + attr->ia_size < i_size && + attr->ia_size > vnode->status.size) { + truncate_pagecache(inode, attr->ia_size); + fscache_resize_cookie(afs_vnode_cache(vnode), + attr->ia_size); + i_size_write(inode, attr->ia_size); + ret = 0; + goto out_unlock; } + } - afs_check_for_remote_deletion(&fc, vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, - &data_version, scb); - ret = afs_end_vnode_operation(&fc); + op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ? + afs_file_key(attr->ia_file) : NULL), + vnode->volume); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto out_unlock; } - if (!(attr->ia_valid & ATTR_FILE)) - key_put(key); + afs_op_set_vnode(op, 0, vnode); + op->setattr.attr = attr; -error_scb: - kfree(scb); -error: + if (attr->ia_valid & ATTR_SIZE) { + op->file[0].dv_delta = 1; + op->file[0].set_size = true; + } + op->ctime = attr->ia_ctime; + op->file[0].update_ctime = 1; + op->file[0].modification = true; + + op->ops = &afs_setattr_operation; + ret = afs_do_sync_operation(op); + +out_unlock: + up_write(&vnode->validate_lock); + fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL); _leave(" = %d", ret); return ret; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 35f951ac296f..723d162078a3 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -31,6 +31,7 @@ struct pagevec; struct afs_call; +struct afs_vnode; /* * Partial file-locking emulation mode. (The problem being that AFS3 only @@ -59,13 +60,6 @@ struct afs_fs_context { struct key *key; /* key to use for secure mounting */ }; -struct afs_iget_data { - struct afs_fid fid; - struct afs_volume *volume; /* volume on which resides */ - unsigned int cb_v_break; /* Pre-fetch volume break count */ - unsigned int cb_s_break; /* Pre-fetch server break count */ -}; - enum afs_call_state { AFS_CALL_CL_REQUESTING, /* Client: Request is being sent */ AFS_CALL_CL_AWAIT_REPLY, /* Client: Awaiting reply */ @@ -90,7 +84,6 @@ struct afs_addr_list { unsigned char nr_ipv4; /* Number of IPv4 addresses */ enum dns_record_source source:8; enum dns_lookup_status status:8; - unsigned long probed; /* Mask of servers that have been probed */ unsigned long failed; /* Mask of addrs that failed locally/ICMP */ unsigned long responded; /* Mask of addrs that responded */ struct sockaddr_rxrpc addrs[]; @@ -111,11 +104,10 @@ struct afs_call { struct afs_net *net; /* The network namespace */ struct afs_server *server; /* The fileserver record if fs op (pins ref) */ struct afs_vlserver *vlserver; /* The vlserver record if vl op */ - struct afs_cb_interest *cbi; /* Callback interest for server used */ - struct afs_vnode *lvnode; /* vnode being locked */ void *request; /* request data (first part) */ - struct address_space *mapping; /* Pages being written from */ + size_t iov_len; /* Size of *iter to be used */ struct iov_iter def_iter; /* Default buffer/data iterator */ + struct iov_iter *write_iter; /* Iterator defining write to be made */ struct iov_iter *iter; /* Iterator currently in use */ union { /* Convenience for ->def_iter */ struct kvec kvec[1]; @@ -126,41 +118,27 @@ struct afs_call { long ret0; /* Value to reply with instead of 0 */ struct afs_addr_list *ret_alist; struct afs_vldb_entry *ret_vldb; - struct afs_acl *ret_acl; + char *ret_str; }; - struct afs_fid *out_fid; - struct afs_status_cb *out_dir_scb; - struct afs_status_cb *out_scb; - struct yfs_acl *out_yacl; - struct afs_volsync *out_volsync; - struct afs_volume_status *out_volstatus; - struct afs_read *read_request; + struct afs_operation *op; unsigned int server_index; - pgoff_t first; /* first page in mapping to deal with */ - pgoff_t last; /* last page in mapping to deal with */ - atomic_t usage; + refcount_t ref; enum afs_call_state state; spinlock_t state_lock; int error; /* error code */ u32 abort_code; /* Remote abort ID or 0 */ - u32 epoch; unsigned int max_lifespan; /* Maximum lifespan to set if not 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ - unsigned first_offset; /* offset into mapping[first] */ - union { - unsigned last_to; /* amount of mapping[last] */ - unsigned count2; /* count used in unmarshalling */ - }; + unsigned count2; /* count used in unmarshalling */ unsigned char unmarshall; /* unmarshalling phase */ unsigned char addr_ix; /* Address in ->alist */ - bool incoming; /* T if incoming call */ - bool send_pages; /* T if data from mapping should be sent */ + bool drop_ref; /* T if need to drop ref for incoming call */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ bool upgrade; /* T to request service upgrade */ - bool have_reply_time; /* T if have got reply_time */ bool intr; /* T if interruptible */ + bool unmarshalling_error; /* T if an unmarshalling error occurred */ u16 service_id; /* Actual service ID (after upgrade) */ unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ @@ -172,7 +150,7 @@ struct afs_call { } __attribute__((packed)); __be64 tmp64; }; - ktime_t reply_time; /* Time of first reply packet */ + ktime_t issue_time; /* Time of issue of operation */ }; struct afs_call_type { @@ -225,17 +203,19 @@ struct afs_read { loff_t pos; /* Where to start reading */ loff_t len; /* How much we're asking for */ loff_t actual_len; /* How much we're actually getting */ - loff_t remain; /* Amount remaining */ loff_t file_size; /* File size returned by server */ + struct key *key; /* The key to use to reissue the read */ + struct afs_vnode *vnode; /* The file being read into. */ + struct netfs_io_subrequest *subreq; /* Fscache helper read request this belongs to */ afs_dataversion_t data_version; /* Version number returned by server */ refcount_t usage; - unsigned int index; /* Which page we're reading into */ + unsigned int call_debug_id; unsigned int nr_pages; - unsigned int offset; /* offset into current page */ - struct afs_vnode *vnode; - void (*page_done)(struct afs_read *); - struct page **pages; - struct page *array[]; + int error; + void (*done)(struct afs_read *); + void (*cleanup)(struct afs_read *); + struct iov_iter *iter; /* Iterator representing the buffer */ + struct iov_iter def_iter; /* Default iterator */ }; /* @@ -286,11 +266,12 @@ struct afs_net { /* Cell database */ struct rb_root cells; - struct afs_cell __rcu *ws_cell; + struct afs_cell *ws_cell; struct work_struct cells_manager; struct timer_list cells_timer; atomic_t cells_outstanding; - seqlock_t cells_lock; + struct rw_semaphore cells_lock; + struct mutex cells_alias_lock; struct mutex proc_cells_lock; struct hlist_head proc_cells; @@ -299,9 +280,10 @@ struct afs_net { * cell, but in practice, people create aliases and subsets and there's * no easy way to distinguish them. */ - seqlock_t fs_lock; /* For fs_servers */ + seqlock_t fs_lock; /* For fs_servers, fs_probe_*, fs_proc */ struct rb_root fs_servers; /* afs_server (by server UUID or address) */ - struct list_head fs_updates; /* afs_server (by update_at) */ + struct list_head fs_probe_fast; /* List of afs_server to probe at 30s intervals */ + struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */ struct hlist_head fs_proc; /* procfs servers list */ struct hlist_head fs_addresses4; /* afs_server (by lowest IPv4 addr) */ @@ -310,6 +292,9 @@ struct afs_net { struct work_struct fs_manager; struct timer_list fs_timer; + + struct work_struct fs_prober; + struct timer_list fs_probe_timer; atomic_t servers_outstanding; /* File locking renewal management */ @@ -325,7 +310,7 @@ struct afs_net { atomic_t n_lookup; /* Number of lookups done */ atomic_t n_reval; /* Number of dentries needing revalidation */ atomic_t n_inval; /* Number of invalidations by the server */ - atomic_t n_relpg; /* Number of invalidations by releasepage */ + atomic_t n_relpg; /* Number of invalidations by release_folio */ atomic_t n_read_dir; /* Number of directory pages read */ atomic_t n_dir_cr; /* Number of directory entry creation edits */ atomic_t n_dir_rm; /* Number of directory entry removal edits */ @@ -344,6 +329,7 @@ enum afs_cell_state { AFS_CELL_DEACTIVATING, AFS_CELL_INACTIVE, AFS_CELL_FAILED, + AFS_CELL_REMOVED, }; /* @@ -360,8 +346,10 @@ enum afs_cell_state { * for authentication and encryption. The cell name is not typically used in * the protocol. * - * There is no easy way to determine if two cells are aliases or one is a - * subset of another. + * Two cells are determined to be aliases if they have an explicit alias (YFS + * only), share any VL servers in common or have at least one volume in common. + * "In common" means that the address list of the VL servers or the fileservers + * share at least one endpoint. */ struct afs_cell { union { @@ -369,34 +357,44 @@ struct afs_cell { struct rb_node net_node; /* Node in net->cells */ }; struct afs_net *net; + struct afs_cell *alias_of; /* The cell this is an alias of */ + struct afs_volume *root_volume; /* The root.cell volume if there is one */ struct key *anonymous_key; /* anonymous user key for this cell */ struct work_struct manager; /* Manager for init/deinit/dns */ struct hlist_node proc_link; /* /proc cell list link */ -#ifdef CONFIG_AFS_FSCACHE - struct fscache_cookie *cache; /* caching cookie */ -#endif time64_t dns_expiry; /* Time AFSDB/SRV record expires */ time64_t last_inactive; /* Time of last drop of usage count */ - atomic_t usage; + refcount_t ref; /* Struct refcount */ + atomic_t active; /* Active usage counter */ unsigned long flags; #define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */ #define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */ +#define AFS_CELL_FL_CHECK_ALIAS 2 /* Need to check for aliases */ enum afs_cell_state state; short error; enum dns_record_source dns_source:8; /* Latest source of data from lookup */ enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */ unsigned int dns_lookup_count; /* Counter of DNS lookups */ + unsigned int debug_id; + + /* The volumes belonging to this cell */ + struct rb_root volumes; /* Tree of volumes on this server */ + struct hlist_head proc_volumes; /* procfs volume list */ + seqlock_t volume_lock; /* For volumes */ /* Active fileserver interaction state. */ - struct list_head proc_volumes; /* procfs volume list */ - rwlock_t proc_lock; + struct rb_root fs_servers; /* afs_server (by server UUID) */ + seqlock_t fs_lock; /* For fs_servers */ + struct rw_semaphore fs_open_mmaps_lock; + struct list_head fs_open_mmaps; /* List of vnodes that are mmapped */ + atomic_t fs_s_break; /* Counter of CB.InitCallBackState messages */ /* VL server list. */ rwlock_t vl_servers_lock; /* Lock on vl_servers */ struct afs_vlserver_list __rcu *vl_servers; u8 name_len; /* Length of name */ - char name[64 + 1]; /* Cell name, case-flattened and NUL-padded */ + char *name; /* Cell name, case-flattened and NUL-padded */ }; /* @@ -409,22 +407,24 @@ struct afs_vlserver { #define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */ #define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */ #define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */ +#define AFS_VLSERVER_FL_RESPONDING 3 /* VL server is responding */ rwlock_t lock; /* Lock on addresses */ - atomic_t usage; + refcount_t ref; + unsigned int rtt; /* Server's current RTT in uS */ /* Probe state */ wait_queue_head_t probe_wq; atomic_t probe_outstanding; spinlock_t probe_lock; struct { - unsigned int rtt; /* RTT as ktime/64 */ + unsigned int rtt; /* RTT in uS */ u32 abort_code; short error; - bool have_result; - bool responded:1; - bool is_yfs:1; - bool not_yfs:1; - bool local_failure:1; + unsigned short flags; +#define AFS_VLSERVER_PROBE_RESPONDED 0x01 /* At least once response (may be abort) */ +#define AFS_VLSERVER_PROBE_IS_YFS 0x02 /* The peer appears to be YFS */ +#define AFS_VLSERVER_PROBE_NOT_YFS 0x04 /* The peer appears not to be YFS */ +#define AFS_VLSERVER_PROBE_LOCAL_FAILURE 0x08 /* A local failure prevented a probe */ } probe; u16 port; @@ -445,7 +445,7 @@ struct afs_vlserver_entry { struct afs_vlserver_list { struct rcu_head rcu; - atomic_t usage; + refcount_t ref; u8 nr_servers; u8 index; /* Server currently in use */ u8 preferred; /* Preferred server */ @@ -471,6 +471,7 @@ struct afs_vldb_entry { #define AFS_VLDB_QUERY_ERROR 4 /* - VL server returned error */ uuid_t fs_server[AFS_NMAXNSERVERS]; + u32 addr_version[AFS_NMAXNSERVERS]; /* Registration change counters */ u8 fs_mask[AFS_NMAXNSERVERS]; #define AFS_VOL_VTM_RW 0x01 /* R/W version of the volume is available (on this server) */ #define AFS_VOL_VTM_RO 0x02 /* R/O version of the volume is available (on this server) */ @@ -492,96 +493,66 @@ struct afs_server { }; struct afs_addr_list __rcu *addresses; - struct rb_node uuid_rb; /* Link in net->servers */ + struct afs_cell *cell; /* Cell to which belongs (pins ref) */ + struct rb_node uuid_rb; /* Link in net->fs_servers */ + struct afs_server __rcu *uuid_next; /* Next server with same UUID */ + struct afs_server *uuid_prev; /* Previous server with same UUID */ + struct list_head probe_link; /* Link in net->fs_probe_list */ struct hlist_node addr4_link; /* Link in net->fs_addresses4 */ struct hlist_node addr6_link; /* Link in net->fs_addresses6 */ struct hlist_node proc_link; /* Link in net->fs_proc */ + struct work_struct initcb_work; /* Work for CB.InitCallBackState* */ struct afs_server *gc_next; /* Next server in manager's list */ - time64_t put_time; /* Time at which last put */ - time64_t update_at; /* Time at which to next update the record */ + time64_t unuse_time; /* Time at which last unused */ unsigned long flags; -#define AFS_SERVER_FL_NOT_READY 1 /* The record is not ready for use */ -#define AFS_SERVER_FL_NOT_FOUND 2 /* VL server says no such server */ -#define AFS_SERVER_FL_VL_FAIL 3 /* Failed to access VL server */ -#define AFS_SERVER_FL_UPDATING 4 -#define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ -#define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ -#define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ +#define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */ +#define AFS_SERVER_FL_UPDATING 1 +#define AFS_SERVER_FL_NEEDS_UPDATE 2 /* Fileserver address list is out of date */ +#define AFS_SERVER_FL_NOT_READY 4 /* The record is not ready for use */ +#define AFS_SERVER_FL_NOT_FOUND 5 /* VL server says no such server */ +#define AFS_SERVER_FL_VL_FAIL 6 /* Failed to access VL server */ #define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ -#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */ -#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */ -#define AFS_SERVER_FL_HAVE_EPOCH 11 /* ->epoch is valid */ - atomic_t usage; +#define AFS_SERVER_FL_IS_YFS 16 /* Server is YFS not AFS */ +#define AFS_SERVER_FL_NO_IBULK 17 /* Fileserver doesn't support FS.InlineBulkStatus */ +#define AFS_SERVER_FL_NO_RM2 18 /* Fileserver doesn't support YFS.RemoveFile2 */ +#define AFS_SERVER_FL_HAS_FS64 19 /* Fileserver supports FS.{Fetch,Store}Data64 */ + refcount_t ref; /* Object refcount */ + atomic_t active; /* Active user count */ u32 addr_version; /* Address list version */ - u32 cm_epoch; /* Server RxRPC epoch */ + unsigned int rtt; /* Server's current RTT in uS */ unsigned int debug_id; /* Debugging ID for traces */ /* file service access */ rwlock_t fs_lock; /* access lock */ /* callback promise management */ - struct hlist_head cb_volumes; /* List of volume interests on this server */ unsigned cb_s_break; /* Break-everything counter. */ - rwlock_t cb_break_lock; /* Volume finding lock */ /* Probe state */ + unsigned long probed_at; /* Time last probe was dispatched (jiffies) */ wait_queue_head_t probe_wq; atomic_t probe_outstanding; spinlock_t probe_lock; struct { - unsigned int rtt; /* RTT as ktime/64 */ + unsigned int rtt; /* RTT in uS */ u32 abort_code; - u32 cm_epoch; short error; - bool have_result; bool responded:1; bool is_yfs:1; bool not_yfs:1; bool local_failure:1; - bool no_epoch:1; - bool cm_probed:1; - bool said_rebooted:1; - bool said_inconsistent:1; } probe; }; /* - * Volume collation in the server's callback interest list. - */ -struct afs_vol_interest { - struct hlist_node srv_link; /* Link in server->cb_volumes */ - struct hlist_head cb_interests; /* List of callback interests on the server */ - union { - struct rcu_head rcu; - afs_volid_t vid; /* Volume ID to match */ - }; - unsigned int usage; -}; - -/* - * Interest by a superblock on a server. - */ -struct afs_cb_interest { - struct hlist_node cb_vlink; /* Link in vol_interest->cb_interests */ - struct afs_vol_interest *vol_interest; - struct afs_server *server; /* Server on which this interest resides */ - struct super_block *sb; /* Superblock on which inodes reside */ - union { - struct rcu_head rcu; - afs_volid_t vid; /* Volume ID to match */ - }; - refcount_t usage; -}; - -/* - * Replaceable server list. + * Replaceable volume server list. */ struct afs_server_entry { struct afs_server *server; - struct afs_cb_interest *cb_interest; }; struct afs_server_list { + afs_volid_t vids[AFS_MAXTYPES]; /* Volume IDs */ refcount_t usage; unsigned char nr_servers; unsigned char preferred; /* Preferred server */ @@ -595,11 +566,16 @@ struct afs_server_list { * Live AFS volume management. */ struct afs_volume { - afs_volid_t vid; /* volume ID */ - atomic_t usage; + union { + struct rcu_head rcu; + afs_volid_t vid; /* volume ID */ + }; + refcount_t ref; time64_t update_at; /* Time at which to next update */ struct afs_cell *cell; /* Cell to which belongs (pins ref) */ - struct list_head proc_link; /* Link in cell->vl_proc */ + struct rb_node cell_node; /* Link in cell->volumes */ + struct hlist_node proc_link; /* Link in cell->proc_volumes */ + struct super_block __rcu *sb; /* Superblock on which inodes reside */ unsigned long flags; #define AFS_VOLUME_NEEDS_UPDATE 0 /* - T if an update needs performing */ #define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */ @@ -607,10 +583,11 @@ struct afs_volume { #define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */ #define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */ #define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */ +#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */ #ifdef CONFIG_AFS_FSCACHE - struct fscache_cookie *cache; /* caching cookie */ + struct fscache_volume *cache; /* Caching cookie */ #endif - struct afs_server_list *servers; /* List of servers on which volume resides */ + struct afs_server_list __rcu *servers; /* List of servers on which volume resides */ rwlock_t servers_lock; /* Lock for ->servers */ unsigned int servers_seq; /* Incremented each time ->servers changes */ @@ -618,7 +595,6 @@ struct afs_volume { rwlock_t cb_v_break_lock; afs_voltype_t type; /* type of volume */ - short error; char type_force; /* force volume type (suppress R/O -> R/W) */ u8 name_len; u8 name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */ @@ -642,15 +618,11 @@ enum afs_lock_state { * leak from one inode to another. */ struct afs_vnode { - struct inode vfs_inode; /* the VFS's inode record */ - + struct netfs_inode netfs; /* Netfslib context and vfs inode */ struct afs_volume *volume; /* volume on which vnode resides */ struct afs_fid fid; /* the file identifier for this inode */ struct afs_file_status status; /* AFS status info for this file */ afs_dataversion_t invalid_before; /* Child dentries are invalid before this */ -#ifdef CONFIG_AFS_FSCACHE - struct fscache_cookie *cache; /* caching cookie */ -#endif struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ struct rw_semaphore validate_lock; /* lock for validating this vnode */ @@ -668,6 +640,8 @@ struct afs_vnode { #define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */ #define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */ +#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */ +#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */ struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ @@ -679,11 +653,15 @@ struct afs_vnode { afs_lock_type_t lock_type : 8; /* outstanding callback notification on this file */ - struct afs_cb_interest __rcu *cb_interest; /* Server on which this resides */ + struct work_struct cb_work; /* Work for mmap'd files */ + struct list_head cb_mmap_link; /* Link in cell->fs_open_mmaps */ + void *cb_server; /* Server with callback/filelock */ + atomic_t cb_nr_mmap; /* Number of mmaps */ + unsigned int cb_fs_s_break; /* Mass server break counter (cell->fs_s_break) */ unsigned int cb_s_break; /* Mass break counter on ->server */ unsigned int cb_v_break; /* Mass break counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ - seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */ + seqlock_t cb_lock; /* Lock for ->cb_server, ->status, ->cb_*break */ time64_t cb_expires_at; /* time at which callback expires */ }; @@ -691,12 +669,20 @@ struct afs_vnode { static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode) { #ifdef CONFIG_AFS_FSCACHE - return vnode->cache; + return netfs_i_cookie(&vnode->netfs); #else return NULL; #endif } +static inline void afs_vnode_set_cache(struct afs_vnode *vnode, + struct fscache_cookie *cookie) +{ +#ifdef CONFIG_AFS_FSCACHE + vnode->netfs.cache = cookie; +#endif +} + /* * cached security record for one user's attempt to access a vnode */ @@ -760,38 +746,205 @@ struct afs_vl_cursor { }; /* - * Cursor for iterating over a set of fileservers. + * Fileserver operation methods. + */ +struct afs_operation_ops { + void (*issue_afs_rpc)(struct afs_operation *op); + void (*issue_yfs_rpc)(struct afs_operation *op); + void (*success)(struct afs_operation *op); + void (*aborted)(struct afs_operation *op); + void (*failed)(struct afs_operation *op); + void (*edit_dir)(struct afs_operation *op); + void (*put)(struct afs_operation *op); +}; + +struct afs_vnode_param { + struct afs_vnode *vnode; + struct afs_fid fid; /* Fid to access */ + struct afs_status_cb scb; /* Returned status and callback promise */ + afs_dataversion_t dv_before; /* Data version before the call */ + unsigned int cb_break_before; /* cb_break + cb_s_break before the call */ + u8 dv_delta; /* Expected change in data version */ + bool put_vnode:1; /* T if we have a ref on the vnode */ + bool need_io_lock:1; /* T if we need the I/O lock on this */ + bool update_ctime:1; /* Need to update the ctime */ + bool set_size:1; /* Must update i_size */ + bool op_unlinked:1; /* True if file was unlinked by op */ + bool speculative:1; /* T if speculative status fetch (no vnode lock) */ + bool modification:1; /* Set if the content gets modified */ +}; + +/* + * Fileserver operation wrapper, handling server and address rotation + * asynchronously. May make simultaneous calls to multiple servers. */ -struct afs_fs_cursor { +struct afs_operation { + struct afs_net *net; /* Network namespace */ + struct key *key; /* Key for the cell */ const struct afs_call_type *type; /* Type of call done */ + const struct afs_operation_ops *ops; + + /* Parameters/results for the operation */ + struct afs_volume *volume; /* Volume being accessed */ + struct afs_vnode_param file[2]; + struct afs_vnode_param *more_files; + struct afs_volsync volsync; + struct dentry *dentry; /* Dentry to be altered */ + struct dentry *dentry_2; /* Second dentry to be altered */ + struct timespec64 mtime; /* Modification time to record */ + struct timespec64 ctime; /* Change time to set */ + short nr_files; /* Number of entries in file[], more_files */ + short error; + unsigned int debug_id; + + unsigned int cb_v_break; /* Volume break counter before op */ + unsigned int cb_s_break; /* Server break counter before op */ + + union { + struct { + int which; /* Which ->file[] to fetch for */ + } fetch_status; + struct { + int reason; /* enum afs_edit_dir_reason */ + mode_t mode; + const char *symlink; + } create; + struct { + bool need_rehash; + } unlink; + struct { + struct dentry *rehash; + struct dentry *tmp; + bool new_negative; + } rename; + struct { + struct afs_read *req; + } fetch; + struct { + afs_lock_type_t type; + } lock; + struct { + struct iov_iter *write_iter; + loff_t pos; + loff_t size; + loff_t i_size; + bool laundering; /* Laundering page, PG_writeback not set */ + } store; + struct { + struct iattr *attr; + loff_t old_i_size; + } setattr; + struct afs_acl *acl; + struct yfs_acl *yacl; + struct { + struct afs_volume_status vs; + struct kstatfs *buf; + } volstatus; + }; + + /* Fileserver iteration state */ struct afs_addr_cursor ac; - struct afs_vnode *vnode; struct afs_server_list *server_list; /* Current server list (pins ref) */ - struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */ - struct key *key; /* Key for the server */ + struct afs_server *server; /* Server we're using (ref pinned by server_list) */ + struct afs_call *call; unsigned long untried; /* Bitmask of untried servers */ - unsigned int cb_break; /* cb_break + cb_s_break before the call */ - unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */ short index; /* Current server */ - short error; - unsigned short flags; -#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */ -#define AFS_FS_CURSOR_VBUSY 0x0002 /* Set if seen VBUSY */ -#define AFS_FS_CURSOR_VMOVED 0x0004 /* Set if seen VMOVED */ -#define AFS_FS_CURSOR_VNOVOL 0x0008 /* Set if seen VNOVOL */ -#define AFS_FS_CURSOR_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ -#define AFS_FS_CURSOR_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ -#define AFS_FS_CURSOR_INTR 0x0040 /* Set if op is interruptible */ unsigned short nr_iterations; /* Number of server iterations */ + + unsigned int flags; +#define AFS_OPERATION_STOP 0x0001 /* Set to cease iteration */ +#define AFS_OPERATION_VBUSY 0x0002 /* Set if seen VBUSY */ +#define AFS_OPERATION_VMOVED 0x0004 /* Set if seen VMOVED */ +#define AFS_OPERATION_VNOVOL 0x0008 /* Set if seen VNOVOL */ +#define AFS_OPERATION_CUR_ONLY 0x0010 /* Set if current server only (file lock held) */ +#define AFS_OPERATION_NO_VSLEEP 0x0020 /* Set to prevent sleep on VBUSY, VOFFLINE, ... */ +#define AFS_OPERATION_UNINTR 0x0040 /* Set if op is uninterruptible */ +#define AFS_OPERATION_DOWNGRADE 0x0080 /* Set to retry with downgraded opcode */ +#define AFS_OPERATION_LOCK_0 0x0100 /* Set if have io_lock on file[0] */ +#define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */ +#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */ +#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */ +#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */ }; /* * Cache auxiliary data. */ struct afs_vnode_cache_aux { - u64 data_version; + __be64 data_version; } __packed; +static inline void afs_set_cache_aux(struct afs_vnode *vnode, + struct afs_vnode_cache_aux *aux) +{ + aux->data_version = cpu_to_be64(vnode->status.data_version); +} + +static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int flags) +{ + struct afs_vnode_cache_aux aux; + + afs_set_cache_aux(vnode, &aux); + fscache_invalidate(afs_vnode_cache(vnode), &aux, + i_size_read(&vnode->netfs.inode), flags); +} + +/* + * We use folio->private to hold the amount of the folio that we've written to, + * splitting the field into two parts. However, we need to represent a range + * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio + * exceeds what we can encode. + */ +#ifdef CONFIG_64BIT +#define __AFS_FOLIO_PRIV_MASK 0x7fffffffUL +#define __AFS_FOLIO_PRIV_SHIFT 32 +#define __AFS_FOLIO_PRIV_MMAPPED 0x80000000UL +#else +#define __AFS_FOLIO_PRIV_MASK 0x7fffUL +#define __AFS_FOLIO_PRIV_SHIFT 16 +#define __AFS_FOLIO_PRIV_MMAPPED 0x8000UL +#endif + +static inline unsigned int afs_folio_dirty_resolution(struct folio *folio) +{ + int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1); + return (shift > 0) ? shift : 0; +} + +static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv) +{ + unsigned long x = priv & __AFS_FOLIO_PRIV_MASK; + + /* The lower bound is inclusive */ + return x << afs_folio_dirty_resolution(folio); +} + +static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv) +{ + unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK; + + /* The upper bound is immediately beyond the region */ + return (x + 1) << afs_folio_dirty_resolution(folio); +} + +static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to) +{ + unsigned int res = afs_folio_dirty_resolution(folio); + from >>= res; + to = (to - 1) >> res; + return (to << __AFS_FOLIO_PRIV_SHIFT) | from; +} + +static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv) +{ + return priv | __AFS_FOLIO_PRIV_MMAPPED; +} + +static inline bool afs_is_folio_dirty_mmapped(unsigned long priv) +{ + return priv & __AFS_FOLIO_PRIV_MMAPPED; +} + #include <trace/events/afs.h> /*****************************************************************************/ @@ -823,57 +976,43 @@ extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16); */ #ifdef CONFIG_AFS_FSCACHE extern struct fscache_netfs afs_cache_netfs; -extern struct fscache_cookie_def afs_cell_cache_index_def; -extern struct fscache_cookie_def afs_volume_cache_index_def; -extern struct fscache_cookie_def afs_vnode_cache_index_def; -#else -#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL) -#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL) -#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL) #endif /* * callback.c */ +extern void afs_invalidate_mmap_work(struct work_struct *); +extern void afs_server_init_callback_work(struct work_struct *work); extern void afs_init_callback_state(struct afs_server *); extern void __afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); extern void afs_break_callback(struct afs_vnode *, enum afs_cb_break_reason); extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break *); -extern int afs_register_server_cb_interest(struct afs_vnode *, - struct afs_server_list *, unsigned int); -extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); -extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *); - -static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi) -{ - if (cbi) - refcount_inc(&cbi->usage); - return cbi; -} - static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) { return vnode->cb_break + vnode->cb_v_break; } static inline bool afs_cb_is_broken(unsigned int cb_break, - const struct afs_vnode *vnode, - const struct afs_cb_interest *cbi) + const struct afs_vnode *vnode) { - return !cbi || cb_break != (vnode->cb_break + - vnode->volume->cb_v_break); + return cb_break != (vnode->cb_break + vnode->volume->cb_v_break); } /* * cell.c */ extern int afs_cell_init(struct afs_net *, const char *); -extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned); +extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned, + enum afs_cell_trace); extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned, const char *, bool); -extern struct afs_cell *afs_get_cell(struct afs_cell *); -extern void afs_put_cell(struct afs_net *, struct afs_cell *); +extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace); +extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace); +extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace); extern void afs_manage_cells(struct work_struct *); extern void afs_cells_timer(struct timer_list *); extern void __net_exit afs_cell_purge(struct afs_net *); @@ -892,6 +1031,7 @@ extern const struct address_space_operations afs_dir_aops; extern const struct dentry_operations afs_fs_dentry_operations; extern void afs_d_release(struct dentry *); +extern void afs_check_for_remote_deletion(struct afs_operation *); /* * dir_edit.c @@ -922,17 +1062,20 @@ extern void afs_dynroot_depopulate(struct super_block *); /* * file.c */ -extern const struct address_space_operations afs_fs_aops; +extern const struct address_space_operations afs_file_aops; +extern const struct address_space_operations afs_symlink_aops; extern const struct inode_operations afs_file_inode_operations; extern const struct file_operations afs_file_operations; +extern const struct netfs_request_ops afs_req_ops; extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *); extern void afs_put_wb_key(struct afs_wb_key *); extern int afs_open(struct inode *, struct file *); extern int afs_release(struct inode *, struct file *); -extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *); -extern int afs_page_filler(void *, struct page *); +extern int afs_fetch_data(struct afs_vnode *, struct afs_read *); +extern struct afs_read *afs_alloc_read(gfp_t); extern void afs_put_read(struct afs_read *); +extern int afs_write_inode(struct inode *, struct writeback_control *); static inline struct afs_read *afs_get_read(struct afs_read *req) { @@ -954,76 +1097,89 @@ extern int afs_flock(struct file *, int, struct file_lock *); /* * fsclient.c */ -extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_status_cb *, - struct afs_volsync *); -extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *); -extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_status_cb *, struct afs_read *); -extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, - struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *); -extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, - struct afs_status_cb *); -extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, - struct afs_status_cb *, struct afs_status_cb *); -extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, - struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *); -extern int afs_fs_rename(struct afs_fs_cursor *, const char *, - struct afs_vnode *, const char *, - struct afs_status_cb *, struct afs_status_cb *); -extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *, - pgoff_t, pgoff_t, unsigned, unsigned, struct afs_status_cb *); -extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *, struct afs_status_cb *); -extern int afs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); -extern int afs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t, struct afs_status_cb *); -extern int afs_fs_extend_lock(struct afs_fs_cursor *, struct afs_status_cb *); -extern int afs_fs_release_lock(struct afs_fs_cursor *, struct afs_status_cb *); +extern void afs_fs_fetch_status(struct afs_operation *); +extern void afs_fs_fetch_data(struct afs_operation *); +extern void afs_fs_create_file(struct afs_operation *); +extern void afs_fs_make_dir(struct afs_operation *); +extern void afs_fs_remove_file(struct afs_operation *); +extern void afs_fs_remove_dir(struct afs_operation *); +extern void afs_fs_link(struct afs_operation *); +extern void afs_fs_symlink(struct afs_operation *); +extern void afs_fs_rename(struct afs_operation *); +extern void afs_fs_store_data(struct afs_operation *); +extern void afs_fs_setattr(struct afs_operation *); +extern void afs_fs_get_volume_status(struct afs_operation *); +extern void afs_fs_set_lock(struct afs_operation *); +extern void afs_fs_extend_lock(struct afs_operation *); +extern void afs_fs_release_lock(struct afs_operation *); extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *, struct afs_addr_cursor *, struct key *); -extern struct afs_call *afs_fs_get_capabilities(struct afs_net *, struct afs_server *, - struct afs_addr_cursor *, struct key *, - unsigned int); -extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, - struct afs_fid *, struct afs_status_cb *, - unsigned int, struct afs_volsync *); -extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, - struct afs_fid *, struct afs_status_cb *, - struct afs_volsync *); +extern bool afs_fs_get_capabilities(struct afs_net *, struct afs_server *, + struct afs_addr_cursor *, struct key *); +extern void afs_fs_inline_bulk_status(struct afs_operation *); struct afs_acl { u32 size; u8 data[]; }; -extern struct afs_acl *afs_fs_fetch_acl(struct afs_fs_cursor *, struct afs_status_cb *); -extern int afs_fs_store_acl(struct afs_fs_cursor *, const struct afs_acl *, - struct afs_status_cb *); +extern void afs_fs_fetch_acl(struct afs_operation *); +extern void afs_fs_store_acl(struct afs_operation *); + +/* + * fs_operation.c + */ +extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *); +extern int afs_put_operation(struct afs_operation *); +extern bool afs_begin_vnode_operation(struct afs_operation *); +extern void afs_wait_for_operation(struct afs_operation *); +extern int afs_do_sync_operation(struct afs_operation *); + +static inline void afs_op_nomem(struct afs_operation *op) +{ + op->error = -ENOMEM; +} + +static inline void afs_op_set_vnode(struct afs_operation *op, unsigned int n, + struct afs_vnode *vnode) +{ + op->file[n].vnode = vnode; + op->file[n].need_io_lock = true; +} + +static inline void afs_op_set_fid(struct afs_operation *op, unsigned int n, + const struct afs_fid *fid) +{ + op->file[n].fid = *fid; +} /* * fs_probe.c */ extern void afs_fileserver_probe_result(struct afs_call *); -extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *); +extern void afs_fs_probe_fileserver(struct afs_net *, struct afs_server *, struct key *, bool); extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long); +extern void afs_probe_fileserver(struct afs_net *, struct afs_server *); +extern void afs_fs_probe_dispatcher(struct work_struct *); +extern int afs_wait_for_one_fs_probe(struct afs_server *, bool); +extern void afs_fs_probe_cleanup(struct afs_net *); /* * inode.c */ -extern void afs_vnode_commit_status(struct afs_fs_cursor *, - struct afs_vnode *, - unsigned int, - const afs_dataversion_t *, - struct afs_status_cb *); +extern const struct afs_operation_ops afs_fetch_status_operation; + +extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *); extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *); -extern int afs_iget5_test(struct inode *, void *); +extern int afs_ilookup5_test_by_fid(struct inode *, void *); extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool); -extern struct inode *afs_iget(struct super_block *, struct key *, - struct afs_iget_data *, struct afs_status_cb *, - struct afs_cb_interest *, - struct afs_vnode *); -extern void afs_zap_data(struct afs_vnode *); +extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *); +extern struct inode *afs_root_iget(struct super_block *, struct key *); extern bool afs_check_validity(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); -extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int); -extern int afs_setattr(struct dentry *, struct iattr *); +extern int afs_getattr(struct user_namespace *mnt_userns, const struct path *, + struct kstat *, u32, unsigned int); +extern int afs_setattr(struct user_namespace *mnt_userns, struct dentry *, struct iattr *); extern void afs_evict_inode(struct inode *); extern int afs_drop_inode(struct inode *); @@ -1055,7 +1211,7 @@ static inline struct afs_net *afs_i2net(struct inode *inode) static inline struct afs_net *afs_v2net(struct afs_vnode *vnode) { - return afs_i2net(&vnode->vfs_inode); + return afs_i2net(&vnode->netfs.inode); } static inline struct afs_net *afs_sock2net(struct sock *sk) @@ -1106,11 +1262,8 @@ static inline void afs_put_sysnames(struct afs_sysnames *sysnames) {} /* * rotate.c */ -extern bool afs_begin_vnode_operation(struct afs_fs_cursor *, struct afs_vnode *, - struct key *, bool); -extern bool afs_select_fileserver(struct afs_fs_cursor *); -extern bool afs_select_current_fileserver(struct afs_fs_cursor *); -extern int afs_end_vnode_operation(struct afs_fs_cursor *); +extern bool afs_select_fileserver(struct afs_operation *); +extern void afs_dump_edestaddrreq(const struct afs_operation *); /* * rxrpc.c @@ -1130,16 +1283,22 @@ extern void afs_flat_call_destructor(struct afs_call *); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, bool); -extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause); +extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause); -static inline void afs_set_fc_call(struct afs_call *call, struct afs_fs_cursor *fc) +static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call, + gfp_t gfp) { - call->intr = fc->flags & AFS_FS_CURSOR_INTR; - fc->type = call->type; + op->call = call; + op->type = call->type; + call->op = op; + call->key = op->key; + call->intr = !(op->flags & AFS_OPERATION_UNINTR); + afs_make_call(&op->ac, call, gfp); } static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size) { + call->iov_len = size; call->kvec[0].iov_base = buf; call->kvec[0].iov_len = size; iov_iter_kvec(&call->def_iter, READ, call->kvec, 1, size); @@ -1147,21 +1306,25 @@ static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t si static inline void afs_extract_to_tmp(struct afs_call *call) { + call->iov_len = sizeof(call->tmp); afs_extract_begin(call, &call->tmp, sizeof(call->tmp)); } static inline void afs_extract_to_tmp64(struct afs_call *call) { + call->iov_len = sizeof(call->tmp64); afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64)); } static inline void afs_extract_discard(struct afs_call *call, size_t size) { + call->iov_len = size; iov_iter_discard(&call->def_iter, READ, size); } static inline void afs_extract_to_buf(struct afs_call *call, size_t size) { + call->iov_len = size; afs_extract_begin(call, call->buffer, size); } @@ -1209,8 +1372,16 @@ static inline void afs_set_call_complete(struct afs_call *call, ok = true; } spin_unlock_bh(&call->state_lock); - if (ok) + if (ok) { trace_afs_call_done(call); + + /* Asynchronous calls have two refs to release - one from the alloc and + * one queued with the work item - and we can't just deallocate the + * call because the work item may be queued again. + */ + if (call->drop_ref) + afs_put_call(call); + } } /* @@ -1224,7 +1395,7 @@ extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); extern struct key *afs_request_key_rcu(struct afs_cell *); extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *); -extern int afs_permission(struct inode *, int); +extern int afs_permission(struct user_namespace *, struct inode *, int); extern void __exit afs_clean_up_permit_cache(void); /* @@ -1235,13 +1406,33 @@ extern spinlock_t afs_server_peer_lock; extern struct afs_server *afs_find_server(struct afs_net *, const struct sockaddr_rxrpc *); extern struct afs_server *afs_find_server_by_uuid(struct afs_net *, const uuid_t *); -extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *); +extern struct afs_server *afs_lookup_server(struct afs_cell *, struct key *, const uuid_t *, u32); extern struct afs_server *afs_get_server(struct afs_server *, enum afs_server_trace); +extern struct afs_server *afs_use_server(struct afs_server *, enum afs_server_trace); +extern void afs_unuse_server(struct afs_net *, struct afs_server *, enum afs_server_trace); +extern void afs_unuse_server_notime(struct afs_net *, struct afs_server *, enum afs_server_trace); extern void afs_put_server(struct afs_net *, struct afs_server *, enum afs_server_trace); extern void afs_manage_servers(struct work_struct *); extern void afs_servers_timer(struct timer_list *); +extern void afs_fs_probe_timer(struct timer_list *); extern void __net_exit afs_purge_servers(struct afs_net *); -extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *); +extern bool afs_check_server_record(struct afs_operation *, struct afs_server *); + +static inline void afs_inc_servers_outstanding(struct afs_net *net) +{ + atomic_inc(&net->servers_outstanding); +} + +static inline void afs_dec_servers_outstanding(struct afs_net *net) +{ + if (atomic_dec_and_test(&net->servers_outstanding)) + wake_up_var(&net->servers_outstanding); +} + +static inline bool afs_is_probing_server(struct afs_server *server) +{ + return list_empty(&server->probe_link); +} /* * server_list.c @@ -1273,6 +1464,12 @@ extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uu extern struct afs_call *afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *, struct afs_vlserver *, unsigned int); extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *); +extern char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *); + +/* + * vl_alias.c + */ +extern int afs_cell_detect_alias(struct afs_cell *, struct key *); /* * vl_probe.c @@ -1295,14 +1492,14 @@ extern int afs_end_vlserver_operation(struct afs_vl_cursor *); */ static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver) { - atomic_inc(&vlserver->usage); + refcount_inc(&vlserver->ref); return vlserver; } static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist) { if (vllist) - atomic_inc(&vllist->usage); + refcount_inc(&vllist->ref); return vllist; } @@ -1316,25 +1513,23 @@ extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *, /* * volume.c */ -static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume) -{ - if (volume) - atomic_inc(&volume->usage); - return volume; -} - extern struct afs_volume *afs_create_volume(struct afs_fs_context *); -extern void afs_activate_volume(struct afs_volume *); +extern int afs_activate_volume(struct afs_volume *); extern void afs_deactivate_volume(struct afs_volume *); -extern void afs_put_volume(struct afs_cell *, struct afs_volume *); -extern int afs_check_volume_status(struct afs_volume *, struct key *); +extern struct afs_volume *afs_get_volume(struct afs_volume *, enum afs_volume_trace); +extern void afs_put_volume(struct afs_net *, struct afs_volume *, enum afs_volume_trace); +extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *); /* * write.c */ -extern int afs_set_page_dirty(struct page *); +#ifdef CONFIG_AFS_FSCACHE +bool afs_dirty_folio(struct address_space *, struct folio *); +#else +#define afs_dirty_folio filemap_dirty_folio +#endif extern int afs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, + loff_t pos, unsigned len, struct page **pagep, void **fsdata); extern int afs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, @@ -1345,47 +1540,33 @@ extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_fsync(struct file *, loff_t, loff_t, int); extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf); extern void afs_prune_wb_keys(struct afs_vnode *); -extern int afs_launder_page(struct page *); +int afs_launder_folio(struct folio *); /* * xattr.c */ extern const struct xattr_handler *afs_xattr_handlers[]; -extern ssize_t afs_listxattr(struct dentry *, char *, size_t); /* * yfsclient.c */ -extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_status_cb *, - struct afs_volsync *); -extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_status_cb *, struct afs_read *); -extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, struct afs_status_cb *, - struct afs_fid *, struct afs_status_cb *); -extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, struct afs_status_cb *, - struct afs_fid *, struct afs_status_cb *); -extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, - struct afs_status_cb *, struct afs_status_cb *); -extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, - struct afs_status_cb *); -extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, - struct afs_status_cb *, struct afs_status_cb *); -extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, - struct afs_status_cb *, struct afs_fid *, struct afs_status_cb *); -extern int yfs_fs_rename(struct afs_fs_cursor *, const char *, struct afs_vnode *, const char *, - struct afs_status_cb *, struct afs_status_cb *); -extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *, - pgoff_t, pgoff_t, unsigned, unsigned, struct afs_status_cb *); -extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *, struct afs_status_cb *); -extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *); -extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t, struct afs_status_cb *); -extern int yfs_fs_extend_lock(struct afs_fs_cursor *, struct afs_status_cb *); -extern int yfs_fs_release_lock(struct afs_fs_cursor *, struct afs_status_cb *); -extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *, - struct afs_fid *, struct afs_status_cb *, - struct afs_volsync *); -extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *, - struct afs_fid *, struct afs_status_cb *, - unsigned int, struct afs_volsync *); +extern void yfs_fs_fetch_data(struct afs_operation *); +extern void yfs_fs_create_file(struct afs_operation *); +extern void yfs_fs_make_dir(struct afs_operation *); +extern void yfs_fs_remove_file2(struct afs_operation *); +extern void yfs_fs_remove_file(struct afs_operation *); +extern void yfs_fs_remove_dir(struct afs_operation *); +extern void yfs_fs_link(struct afs_operation *); +extern void yfs_fs_symlink(struct afs_operation *); +extern void yfs_fs_rename(struct afs_operation *); +extern void yfs_fs_store_data(struct afs_operation *); +extern void yfs_fs_setattr(struct afs_operation *); +extern void yfs_fs_get_volume_status(struct afs_operation *); +extern void yfs_fs_set_lock(struct afs_operation *); +extern void yfs_fs_extend_lock(struct afs_operation *); +extern void yfs_fs_release_lock(struct afs_operation *); +extern void yfs_fs_fetch_status(struct afs_operation *); +extern void yfs_fs_inline_bulk_status(struct afs_operation *); struct yfs_acl { struct afs_acl *acl; /* Dir/file/symlink ACL */ @@ -1398,31 +1579,56 @@ struct yfs_acl { }; extern void yfs_free_opaque_acl(struct yfs_acl *); -extern struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *, struct yfs_acl *, - struct afs_status_cb *); -extern int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *, const struct afs_acl *, - struct afs_status_cb *); +extern void yfs_fs_fetch_opaque_acl(struct afs_operation *); +extern void yfs_fs_store_opaque_acl2(struct afs_operation *); /* * Miscellaneous inline functions. */ static inline struct afs_vnode *AFS_FS_I(struct inode *inode) { - return container_of(inode, struct afs_vnode, vfs_inode); + return container_of(inode, struct afs_vnode, netfs.inode); } static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) { - return &vnode->vfs_inode; + return &vnode->netfs.inode; } -static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc, - struct afs_vnode *vnode) +/* + * Note that a dentry got changed. We need to set d_fsdata to the data version + * number derived from the result of the operation. It doesn't matter if + * d_fsdata goes backwards as we'll just revalidate. + */ +static inline void afs_update_dentry_version(struct afs_operation *op, + struct afs_vnode_param *dir_vp, + struct dentry *dentry) { - if (fc->ac.error == -ENOENT) { - set_bit(AFS_VNODE_DELETED, &vnode->flags); - afs_break_callback(vnode, afs_cb_break_for_deleted); - } + if (!op->error) + dentry->d_fsdata = + (void *)(unsigned long)dir_vp->scb.status.data_version; +} + +/* + * Set the file size and block count. Estimate the number of 512 bytes blocks + * used, rounded up to nearest 1K for consistency with other AFS clients. + */ +static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size) +{ + i_size_write(&vnode->netfs.inode, size); + vnode->netfs.inode.i_blocks = ((size + 1023) >> 10) << 1; +} + +/* + * Check for a conflicting operation on a directory that we just unlinked from. + * If someone managed to sneak a link or an unlink in on the file we just + * unlinked, we won't be able to trust nlink on an AFS file (but not YFS). + */ +static inline void afs_check_dir_conflict(struct afs_operation *op, + struct afs_vnode_param *dvp) +{ + if (dvp->dv_before + dvp->dv_delta != dvp->scb.status.data_version) + op->flags |= AFS_OPERATION_DIR_CONFLICT; } static inline int afs_io_error(struct afs_call *call, enum afs_io_error where) diff --git a/fs/afs/main.c b/fs/afs/main.c index c9c45d7078bd..eae288c8d40a 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -78,16 +78,18 @@ static int __net_init afs_net_init(struct net *net_ns) mutex_init(&net->socket_mutex); net->cells = RB_ROOT; - seqlock_init(&net->cells_lock); + init_rwsem(&net->cells_lock); INIT_WORK(&net->cells_manager, afs_manage_cells); timer_setup(&net->cells_timer, afs_cells_timer, 0); + mutex_init(&net->cells_alias_lock); mutex_init(&net->proc_cells_lock); INIT_HLIST_HEAD(&net->proc_cells); seqlock_init(&net->fs_lock); net->fs_servers = RB_ROOT; - INIT_LIST_HEAD(&net->fs_updates); + INIT_LIST_HEAD(&net->fs_probe_fast); + INIT_LIST_HEAD(&net->fs_probe_slow); INIT_HLIST_HEAD(&net->fs_proc); INIT_HLIST_HEAD(&net->fs_addresses4); @@ -96,6 +98,9 @@ static int __net_init afs_net_init(struct net *net_ns) INIT_WORK(&net->fs_manager, afs_manage_servers); timer_setup(&net->fs_timer, afs_servers_timer, 0); + INIT_WORK(&net->fs_prober, afs_fs_probe_dispatcher); + timer_setup(&net->fs_probe_timer, afs_fs_probe_timer, 0); + atomic_set(&net->servers_outstanding, 1); ret = -ENOMEM; sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL); @@ -126,6 +131,7 @@ static int __net_init afs_net_init(struct net *net_ns) error_open_socket: net->live = false; + afs_fs_probe_cleanup(net); afs_cell_purge(net); afs_purge_servers(net); error_cell_init: @@ -146,6 +152,7 @@ static void __net_exit afs_net_exit(struct net *net_ns) struct afs_net *net = afs_net(net_ns); net->live = false; + afs_fs_probe_cleanup(net); afs_cell_purge(net); afs_purge_servers(net); afs_close_socket(net); @@ -179,14 +186,7 @@ static int __init afs_init(void) if (!afs_lock_manager) goto error_lockmgr; -#ifdef CONFIG_AFS_FSCACHE - /* we want to be able to cache */ - ret = fscache_register_netfs(&afs_cache_netfs); - if (ret < 0) - goto error_cache; -#endif - - ret = register_pernet_subsys(&afs_net_ops); + ret = register_pernet_device(&afs_net_ops); if (ret < 0) goto error_net; @@ -196,8 +196,8 @@ static int __init afs_init(void) goto error_fs; afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs"); - if (IS_ERR(afs_proc_symlink)) { - ret = PTR_ERR(afs_proc_symlink); + if (!afs_proc_symlink) { + ret = -ENOMEM; goto error_proc; } @@ -206,12 +206,8 @@ static int __init afs_init(void) error_proc: afs_fs_exit(); error_fs: - unregister_pernet_subsys(&afs_net_ops); + unregister_pernet_device(&afs_net_ops); error_net: -#ifdef CONFIG_AFS_FSCACHE - fscache_unregister_netfs(&afs_cache_netfs); -error_cache: -#endif destroy_workqueue(afs_lock_manager); error_lockmgr: destroy_workqueue(afs_async_calls); @@ -237,10 +233,7 @@ static void __exit afs_exit(void) proc_remove(afs_proc_symlink); afs_fs_exit(); - unregister_pernet_subsys(&afs_net_ops); -#ifdef CONFIG_AFS_FSCACHE - fscache_unregister_netfs(&afs_cache_netfs); -#endif + unregister_pernet_device(&afs_net_ops); destroy_workqueue(afs_lock_manager); destroy_workqueue(afs_async_calls); destroy_workqueue(afs_wq); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 52b19e9c1535..805328ca5428 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -69,6 +69,7 @@ int afs_abort_to_error(u32 abort_code) /* Unified AFS error table */ case UAEPERM: return -EPERM; case UAENOENT: return -ENOENT; + case UAEAGAIN: return -EAGAIN; case UAEACCES: return -EACCES; case UAEBUSY: return -EBUSY; case UAEEXIST: return -EEXIST; @@ -83,6 +84,7 @@ int afs_abort_to_error(u32 abort_code) case UAENOLCK: return -ENOLCK; case UAENOTEMPTY: return -ENOTEMPTY; case UAELOOP: return -ELOOP; + case UAEOVERFLOW: return -EOVERFLOW; case UAENOMEDIUM: return -ENOMEDIUM; case UAEDQUOT: return -EDQUOT; @@ -119,42 +121,42 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code) if (e->error == -ETIMEDOUT || e->error == -ETIME) return; - /* Fall through */ + fallthrough; case -ETIMEDOUT: case -ETIME: if (e->error == -ENOMEM || e->error == -ENONET) return; - /* Fall through */ + fallthrough; case -ENOMEM: case -ENONET: if (e->error == -ERFKILL) return; - /* Fall through */ + fallthrough; case -ERFKILL: if (e->error == -EADDRNOTAVAIL) return; - /* Fall through */ + fallthrough; case -EADDRNOTAVAIL: if (e->error == -ENETUNREACH) return; - /* Fall through */ + fallthrough; case -ENETUNREACH: if (e->error == -EHOSTUNREACH) return; - /* Fall through */ + fallthrough; case -EHOSTUNREACH: if (e->error == -EHOSTDOWN) return; - /* Fall through */ + fallthrough; case -EHOSTDOWN: if (e->error == -ECONNREFUSED) return; - /* Fall through */ + fallthrough; case -ECONNREFUSED: if (e->error == -ECONNRESET) return; - /* Fall through */ + fallthrough; case -ECONNRESET: /* Responded, but call expired. */ if (e->responded) return; @@ -162,8 +164,11 @@ void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code) return; case -ECONNABORTED: + error = afs_abort_to_error(abort_code); + fallthrough; + case -ENETRESET: /* Responded, but we seem to have changed address */ e->responded = true; - e->error = afs_abort_to_error(abort_code); + e->error = error; return; } } diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 79bc5f1338ed..97f50e9fd9eb 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -32,7 +32,6 @@ const struct inode_operations afs_mntpt_inode_operations = { .lookup = afs_mntpt_lookup, .readlink = page_readlink, .getattr = afs_getattr, - .listxattr = afs_listxattr, }; const struct inode_operations afs_autocell_inode_operations = { @@ -88,7 +87,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) ctx->force = true; } if (ctx->cell) { - afs_put_cell(ctx->net, ctx->cell); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt); ctx->cell = NULL; } if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { @@ -124,7 +123,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) char *buf; if (src_as->cell) - ctx->cell = afs_get_cell(src_as->cell); + ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt); if (size < 2 || size > PAGE_SIZE - 1) return -EINVAL; @@ -133,12 +132,6 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) if (IS_ERR(page)) return PTR_ERR(page); - if (PageError(page)) { - ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); - put_page(page); - return ret; - } - buf = kmap(page); ret = -EINVAL; if (buf[size - 1] == '.') diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 468e1713bce1..2a0c83d71565 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -38,7 +38,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) if (v == SEQ_START_TOKEN) { /* display header on line 1 */ - seq_puts(m, "USE TTL SV NAME\n"); + seq_puts(m, "USE ACT TTL SV ST NAME\n"); return 0; } @@ -46,10 +46,12 @@ static int afs_proc_cells_show(struct seq_file *m, void *v) vllist = rcu_dereference(cell->vl_servers); /* display one cell per line on subsequent lines */ - seq_printf(m, "%3u %6lld %2u %s\n", - atomic_read(&cell->usage), + seq_printf(m, "%3u %3u %6lld %2u %2u %s\n", + refcount_read(&cell->ref), + atomic_read(&cell->active), cell->dns_expiry - ktime_get_real_seconds(), - vllist->nr_servers, + vllist ? vllist->nr_servers : 0, + cell->state, cell->name); return 0; } @@ -127,7 +129,7 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size) } if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags)) - afs_put_cell(net, cell); + afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin); } else { goto inval; } @@ -153,13 +155,11 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v) struct afs_net *net; net = afs_seq2net_single(m); - if (rcu_access_pointer(net->ws_cell)) { - rcu_read_lock(); - cell = rcu_dereference(net->ws_cell); - if (cell) - seq_printf(m, "%s\n", cell->name); - rcu_read_unlock(); - } + down_read(&net->cells_lock); + cell = net->ws_cell; + if (cell) + seq_printf(m, "%s\n", cell->name); + up_read(&net->cells_lock); return 0; } @@ -208,17 +208,16 @@ static const char afs_vol_types[3][3] = { */ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) { - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); - struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link); + struct afs_volume *vol = hlist_entry(v, struct afs_volume, proc_link); /* Display header on line 1 */ - if (v == &cell->proc_volumes) { + if (v == SEQ_START_TOKEN) { seq_puts(m, "USE VID TY NAME\n"); return 0; } seq_printf(m, "%3d %08llx %s %s\n", - atomic_read(&vol->usage), vol->vid, + refcount_read(&vol->ref), vol->vid, afs_vol_types[vol->type], vol->name); @@ -228,26 +227,24 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) __acquires(cell->proc_lock) { - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_cell *cell = pde_data(file_inode(m->file)); - read_lock(&cell->proc_lock); - return seq_list_start_head(&cell->proc_volumes, *_pos); + rcu_read_lock(); + return seq_hlist_start_head_rcu(&cell->proc_volumes, *_pos); } static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_cell *cell = pde_data(file_inode(m->file)); - return seq_list_next(v, &cell->proc_volumes, _pos); + return seq_hlist_next_rcu(v, &cell->proc_volumes, _pos); } static void afs_proc_cell_volumes_stop(struct seq_file *m, void *v) __releases(cell->proc_lock) { - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); - - read_unlock(&cell->proc_lock); + rcu_read_unlock(); } static const struct seq_operations afs_proc_cell_volumes_ops = { @@ -312,6 +309,11 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v) alist->preferred == i ? '>' : '-', &alist->addrs[i].transport); } + seq_printf(m, " info: fl=%lx rtt=%d\n", vlserver->flags, vlserver->rtt); + seq_printf(m, " probe: fl=%x e=%d ac=%d out=%d\n", + vlserver->probe.flags, vlserver->probe.error, + vlserver->probe.abort_code, + atomic_read(&vlserver->probe_outstanding)); return 0; } @@ -320,7 +322,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) { struct afs_vl_seq_net_private *priv = m->private; struct afs_vlserver_list *vllist; - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_cell *cell = pde_data(file_inode(m->file)); loff_t pos = *_pos; rcu_read_lock(); @@ -378,20 +380,26 @@ static int afs_proc_servers_show(struct seq_file *m, void *v) int i; if (v == SEQ_START_TOKEN) { - seq_puts(m, "UUID USE ADDR\n"); + seq_puts(m, "UUID REF ACT\n"); return 0; } server = list_entry(v, struct afs_server, proc_link); alist = rcu_dereference(server->addresses); - seq_printf(m, "%pU %3d %pISpc%s\n", + seq_printf(m, "%pU %3d %3d\n", &server->uuid, - atomic_read(&server->usage), - &alist->addrs[0].transport, - alist->preferred == 0 ? "*" : ""); - for (i = 1; i < alist->nr_addrs; i++) - seq_printf(m, " %pISpc%s\n", - &alist->addrs[i].transport, + refcount_read(&server->ref), + atomic_read(&server->active)); + seq_printf(m, " - info: fl=%lx rtt=%u brk=%x\n", + server->flags, server->rtt, server->cb_s_break); + seq_printf(m, " - probe: last=%d out=%d\n", + (int)(jiffies - server->probed_at) / HZ, + atomic_read(&server->probe_outstanding)); + seq_printf(m, " - ALIST v=%u rsp=%lx f=%lx\n", + alist->version, alist->responded, alist->failed); + for (i = 0; i < alist->nr_addrs; i++) + seq_printf(m, " [%x] %pISpc%s\n", + i, &alist->addrs[i].transport, alist->preferred == i ? "*" : ""); return 0; } @@ -563,6 +571,7 @@ void afs_put_sysnames(struct afs_sysnames *sysnames) if (sysnames->subs[i] != afs_init_sysname && sysnames->subs[i] != sysnames->blank) kfree(sysnames->subs[i]); + kfree(sysnames); } } diff --git a/fs/afs/protocol_afs.h b/fs/afs/protocol_afs.h new file mode 100644 index 000000000000..0c39358c8b70 --- /dev/null +++ b/fs/afs/protocol_afs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* AFS protocol bits + * + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + + +#define AFSCAPABILITIESMAX 196 /* Maximum number of words in a capability set */ + +/* AFS3 Fileserver capabilities word 0 */ +#define AFS3_VICED_CAPABILITY_ERRORTRANS 0x0001 /* Uses UAE errors */ +#define AFS3_VICED_CAPABILITY_64BITFILES 0x0002 /* FetchData64 & StoreData64 supported */ +#define AFS3_VICED_CAPABILITY_WRITELOCKACL 0x0004 /* Can lock a file even without lock perm */ +#define AFS3_VICED_CAPABILITY_SANEACLS 0x0008 /* ACLs reviewed for sanity - don't use */ diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h index 32be9c698348..e4cd89c44c46 100644 --- a/fs/afs/protocol_yfs.h +++ b/fs/afs/protocol_yfs.h @@ -8,7 +8,7 @@ #define YFS_FS_SERVICE 2500 #define YFS_CM_SERVICE 2501 -#define YFSCBMAX 1024 +#define YFSCBMAX 1024 enum YFS_CM_Operations { YFSCBProbe = 206, /* probe client */ @@ -168,3 +168,9 @@ enum yfs_lock_type { yfs_LockMandatoryWrite = 0x101, yfs_LockMandatoryExtend = 0x102, }; + +/* RXYFS Viced Capability Flags */ +#define YFS_VICED_CAPABILITY_ERRORTRANS 0x0001 /* Deprecated v0.195 */ +#define YFS_VICED_CAPABILITY_64BITFILES 0x0002 /* Deprecated v0.195 */ +#define YFS_VICED_CAPABILITY_WRITELOCKACL 0x0004 /* Can lock a file even without lock perm */ +#define YFS_VICED_CAPABILITY_SANEACLS 0x0008 /* Deprecated v0.195 */ diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 172ba569cd60..a840c3588ebb 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -15,60 +15,32 @@ #include "afs_fs.h" /* - * Begin an operation on the fileserver. - * - * Fileserver operations are serialised on the server by vnode, so we serialise - * them here also using the io_lock. - */ -bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - struct key *key, bool intr) -{ - memset(fc, 0, sizeof(*fc)); - fc->vnode = vnode; - fc->key = key; - fc->ac.error = SHRT_MAX; - fc->error = -EDESTADDRREQ; - - if (intr) { - fc->flags |= AFS_FS_CURSOR_INTR; - if (mutex_lock_interruptible(&vnode->io_lock) < 0) { - fc->error = -EINTR; - fc->flags |= AFS_FS_CURSOR_STOP; - return false; - } - } else { - mutex_lock(&vnode->io_lock); - } - - if (vnode->lock_state != AFS_VNODE_LOCK_NONE) - fc->flags |= AFS_FS_CURSOR_CUR_ONLY; - return true; -} - -/* * Begin iteration through a server list, starting with the vnode's last used * server if possible, or the last recorded good server if not. */ -static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, +static bool afs_start_fs_iteration(struct afs_operation *op, struct afs_vnode *vnode) { - struct afs_cb_interest *cbi; + struct afs_server *server; + void *cb_server; int i; - read_lock(&vnode->volume->servers_lock); - fc->server_list = afs_get_serverlist(vnode->volume->servers); - read_unlock(&vnode->volume->servers_lock); + read_lock(&op->volume->servers_lock); + op->server_list = afs_get_serverlist( + rcu_dereference_protected(op->volume->servers, + lockdep_is_held(&op->volume->servers_lock))); + read_unlock(&op->volume->servers_lock); - fc->untried = (1UL << fc->server_list->nr_servers) - 1; - fc->index = READ_ONCE(fc->server_list->preferred); + op->untried = (1UL << op->server_list->nr_servers) - 1; + op->index = READ_ONCE(op->server_list->preferred); - cbi = rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->io_lock)); - if (cbi) { + cb_server = vnode->cb_server; + if (cb_server) { /* See if the vnode's preferred record is still available */ - for (i = 0; i < fc->server_list->nr_servers; i++) { - if (fc->server_list->servers[i].cb_interest == cbi) { - fc->index = i; + for (i = 0; i < op->server_list->nr_servers; i++) { + server = op->server_list->servers[i].server; + if (server == cb_server) { + op->index = i; goto found_interest; } } @@ -77,21 +49,18 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc, * serving this vnode, then we can't switch to another server * and have to return an error. */ - if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { - fc->error = -ESTALE; + if (op->flags & AFS_OPERATION_CUR_ONLY) { + op->error = -ESTALE; return false; } /* Note that the callback promise is effectively broken */ write_seqlock(&vnode->cb_lock); - ASSERTCMP(cbi, ==, rcu_access_pointer(vnode->cb_interest)); - rcu_assign_pointer(vnode->cb_interest, NULL); + ASSERTCMP(cb_server, ==, vnode->cb_server); + vnode->cb_server = NULL; if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) vnode->cb_break++; write_sequnlock(&vnode->cb_lock); - - afs_put_cb_interest(afs_v2net(vnode), cbi); - cbi = NULL; } found_interest: @@ -118,12 +87,12 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code) /* * Sleep and retry the operation to the same fileserver. */ -static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) +static bool afs_sleep_and_retry(struct afs_operation *op) { - if (fc->flags & AFS_FS_CURSOR_INTR) { + if (!(op->flags & AFS_OPERATION_UNINTR)) { msleep_interruptible(1000); if (signal_pending(current)) { - fc->error = -ERESTARTSYS; + op->error = -ERESTARTSYS; return false; } } else { @@ -137,26 +106,26 @@ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc) * Select the fileserver to use. May be called multiple times to rotate * through the fileservers. */ -bool afs_select_fileserver(struct afs_fs_cursor *fc) +bool afs_select_fileserver(struct afs_operation *op) { struct afs_addr_list *alist; struct afs_server *server; - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode *vnode = op->file[0].vnode; struct afs_error e; u32 rtt; - int error = fc->ac.error, i; + int error = op->ac.error, i; _enter("%lx[%d],%lx[%d],%d,%d", - fc->untried, fc->index, - fc->ac.tried, fc->ac.index, - error, fc->ac.abort_code); + op->untried, op->index, + op->ac.tried, op->ac.index, + error, op->ac.abort_code); - if (fc->flags & AFS_FS_CURSOR_STOP) { + if (op->flags & AFS_OPERATION_STOP) { _leave(" = f [stopped]"); return false; } - fc->nr_iterations++; + op->nr_iterations++; /* Evaluate the result of the previous operation, if there was one. */ switch (error) { @@ -166,8 +135,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) case 0: default: /* Success or local failure. Stop. */ - fc->error = error; - fc->flags |= AFS_FS_CURSOR_STOP; + op->error = error; + op->flags |= AFS_OPERATION_STOP; _leave(" = f [okay/local %d]", error); return false; @@ -175,42 +144,42 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) /* The far side rejected the operation on some grounds. This * might involve the server being busy or the volume having been moved. */ - switch (fc->ac.abort_code) { + switch (op->ac.abort_code) { case VNOVOL: /* This fileserver doesn't know about the volume. * - May indicate that the VL is wrong - retry once and compare * the results. * - May indicate that the fileserver couldn't attach to the vol. */ - if (fc->flags & AFS_FS_CURSOR_VNOVOL) { - fc->error = -EREMOTEIO; + if (op->flags & AFS_OPERATION_VNOVOL) { + op->error = -EREMOTEIO; goto next_server; } - write_lock(&vnode->volume->servers_lock); - fc->server_list->vnovol_mask |= 1 << fc->index; - write_unlock(&vnode->volume->servers_lock); + write_lock(&op->volume->servers_lock); + op->server_list->vnovol_mask |= 1 << op->index; + write_unlock(&op->volume->servers_lock); - set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); - error = afs_check_volume_status(vnode->volume, fc->key); + set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags); + error = afs_check_volume_status(op->volume, op); if (error < 0) goto failed_set_error; - if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) { - fc->error = -ENOMEDIUM; + if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) { + op->error = -ENOMEDIUM; goto failed; } /* If the server list didn't change, then assume that * it's the fileserver having trouble. */ - if (vnode->volume->servers == fc->server_list) { - fc->error = -EREMOTEIO; + if (rcu_access_pointer(op->volume->servers) == op->server_list) { + op->error = -EREMOTEIO; goto next_server; } /* Try again */ - fc->flags |= AFS_FS_CURSOR_VNOVOL; + op->flags |= AFS_OPERATION_VNOVOL; _leave(" = t [vnovol]"); return true; @@ -220,20 +189,20 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) case VONLINE: case VDISKFULL: case VOVERQUOTA: - fc->error = afs_abort_to_error(fc->ac.abort_code); + op->error = afs_abort_to_error(op->ac.abort_code); goto next_server; case VOFFLINE: - if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) { - afs_busy(vnode->volume, fc->ac.abort_code); - clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); + if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) { + afs_busy(op->volume, op->ac.abort_code); + clear_bit(AFS_VOLUME_BUSY, &op->volume->flags); } - if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { - fc->error = -EADV; + if (op->flags & AFS_OPERATION_NO_VSLEEP) { + op->error = -EADV; goto failed; } - if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { - fc->error = -ESTALE; + if (op->flags & AFS_OPERATION_CUR_ONLY) { + op->error = -ESTALE; goto failed; } goto busy; @@ -244,17 +213,17 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) /* Retry after going round all the servers unless we * have a file lock we need to maintain. */ - if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) { - fc->error = -EBUSY; + if (op->flags & AFS_OPERATION_NO_VSLEEP) { + op->error = -EBUSY; goto failed; } - if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) { - afs_busy(vnode->volume, fc->ac.abort_code); - clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); + if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) { + afs_busy(op->volume, op->ac.abort_code); + clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags); } busy: - if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) { - if (!afs_sleep_and_retry(fc)) + if (op->flags & AFS_OPERATION_CUR_ONLY) { + if (!afs_sleep_and_retry(op)) goto failed; /* Retry with same server & address */ @@ -262,7 +231,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) return true; } - fc->flags |= AFS_FS_CURSOR_VBUSY; + op->flags |= AFS_OPERATION_VBUSY; goto next_server; case VMOVED: @@ -273,15 +242,15 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * We also limit the number of VMOVED hops we will * honour, just in case someone sets up a loop. */ - if (fc->flags & AFS_FS_CURSOR_VMOVED) { - fc->error = -EREMOTEIO; + if (op->flags & AFS_OPERATION_VMOVED) { + op->error = -EREMOTEIO; goto failed; } - fc->flags |= AFS_FS_CURSOR_VMOVED; + op->flags |= AFS_OPERATION_VMOVED; - set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags); - set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags); - error = afs_check_volume_status(vnode->volume, fc->key); + set_bit(AFS_VOLUME_WAIT, &op->volume->flags); + set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags); + error = afs_check_volume_status(op->volume, op); if (error < 0) goto failed_set_error; @@ -294,25 +263,25 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) * * TODO: Retry a few times with sleeps. */ - if (vnode->volume->servers == fc->server_list) { - fc->error = -ENOMEDIUM; + if (rcu_access_pointer(op->volume->servers) == op->server_list) { + op->error = -ENOMEDIUM; goto failed; } goto restart_from_beginning; default: - clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags); - clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags); - fc->error = afs_abort_to_error(fc->ac.abort_code); + clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags); + clear_bit(AFS_VOLUME_BUSY, &op->volume->flags); + op->error = afs_abort_to_error(op->ac.abort_code); goto failed; } case -ETIMEDOUT: case -ETIME: - if (fc->error != -EDESTADDRREQ) + if (op->error != -EDESTADDRREQ) goto iterate_address; - /* Fall through */ + fallthrough; case -ERFKILL: case -EADDRNOTAVAIL: case -ENETUNREACH: @@ -320,103 +289,99 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) case -EHOSTDOWN: case -ECONNREFUSED: _debug("no conn"); - fc->error = error; + op->error = error; goto iterate_address; + case -ENETRESET: + pr_warn("kAFS: Peer reset %s (op=%x)\n", + op->type ? op->type->name : "???", op->debug_id); + fallthrough; case -ECONNRESET: _debug("call reset"); - fc->error = error; + op->error = error; goto failed; } restart_from_beginning: _debug("restart"); - afs_end_cursor(&fc->ac); - afs_put_cb_interest(afs_v2net(vnode), fc->cbi); - fc->cbi = NULL; - afs_put_serverlist(afs_v2net(vnode), fc->server_list); - fc->server_list = NULL; + afs_end_cursor(&op->ac); + op->server = NULL; + afs_put_serverlist(op->net, op->server_list); + op->server_list = NULL; start: _debug("start"); /* See if we need to do an update of the volume record. Note that the * volume may have moved or even have been deleted. */ - error = afs_check_volume_status(vnode->volume, fc->key); + error = afs_check_volume_status(op->volume, op); if (error < 0) goto failed_set_error; - if (!afs_start_fs_iteration(fc, vnode)) + if (!afs_start_fs_iteration(op, vnode)) goto failed; - _debug("__ VOL %llx __", vnode->volume->vid); - error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list); - if (error < 0) - goto failed_set_error; + _debug("__ VOL %llx __", op->volume->vid); pick_server: - _debug("pick [%lx]", fc->untried); + _debug("pick [%lx]", op->untried); - error = afs_wait_for_fs_probes(fc->server_list, fc->untried); + error = afs_wait_for_fs_probes(op->server_list, op->untried); if (error < 0) goto failed_set_error; /* Pick the untried server with the lowest RTT. If we have outstanding * callbacks, we stick with the server we're already using if we can. */ - if (fc->cbi) { - _debug("cbi %u", fc->index); - if (test_bit(fc->index, &fc->untried)) + if (op->server) { + _debug("server %u", op->index); + if (test_bit(op->index, &op->untried)) goto selected_server; - afs_put_cb_interest(afs_v2net(vnode), fc->cbi); - fc->cbi = NULL; - _debug("nocbi"); + op->server = NULL; + _debug("no server"); } - fc->index = -1; + op->index = -1; rtt = U32_MAX; - for (i = 0; i < fc->server_list->nr_servers; i++) { - struct afs_server *s = fc->server_list->servers[i].server; + for (i = 0; i < op->server_list->nr_servers; i++) { + struct afs_server *s = op->server_list->servers[i].server; - if (!test_bit(i, &fc->untried) || !s->probe.responded) + if (!test_bit(i, &op->untried) || + !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags)) continue; if (s->probe.rtt < rtt) { - fc->index = i; + op->index = i; rtt = s->probe.rtt; } } - if (fc->index == -1) + if (op->index == -1) goto no_more_servers; selected_server: - _debug("use %d", fc->index); - __clear_bit(fc->index, &fc->untried); + _debug("use %d", op->index); + __clear_bit(op->index, &op->untried); /* We're starting on a different fileserver from the list. We need to * check it, create a callback intercept, find its address list and * probe its capabilities before we use it. */ - ASSERTCMP(fc->ac.alist, ==, NULL); - server = fc->server_list->servers[fc->index].server; + ASSERTCMP(op->ac.alist, ==, NULL); + server = op->server_list->servers[op->index].server; - if (!afs_check_server_record(fc, server)) + if (!afs_check_server_record(op, server)) goto failed; _debug("USING SERVER: %pU", &server->uuid); - /* Make sure we've got a callback interest record for this server. We - * have to link it in before we send the request as we can be sent a - * break request before we've finished decoding the reply and - * installing the vnode. - */ - error = afs_register_server_cb_interest(vnode, fc->server_list, - fc->index); - if (error < 0) - goto failed_set_error; - - fc->cbi = afs_get_cb_interest( - rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->io_lock))); + op->flags |= AFS_OPERATION_RETRY_SERVER; + op->server = server; + if (vnode->cb_server != server) { + vnode->cb_server = server; + vnode->cb_s_break = server->cb_s_break; + vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break); + vnode->cb_v_break = vnode->volume->cb_v_break; + clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); + } read_lock(&server->fs_lock); alist = rcu_dereference_protected(server->addresses, @@ -424,44 +389,68 @@ selected_server: afs_get_addrlist(alist); read_unlock(&server->fs_lock); - memset(&fc->ac, 0, sizeof(fc->ac)); +retry_server: + memset(&op->ac, 0, sizeof(op->ac)); - if (!fc->ac.alist) - fc->ac.alist = alist; + if (!op->ac.alist) + op->ac.alist = alist; else afs_put_addrlist(alist); - fc->ac.index = -1; + op->ac.index = -1; iterate_address: - ASSERT(fc->ac.alist); + ASSERT(op->ac.alist); /* Iterate over the current server's address list to try and find an * address on which it will respond to us. */ - if (!afs_iterate_addresses(&fc->ac)) - goto next_server; + if (!afs_iterate_addresses(&op->ac)) + goto out_of_addresses; - _debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs); + _debug("address [%u] %u/%u %pISp", + op->index, op->ac.index, op->ac.alist->nr_addrs, + &op->ac.alist->addrs[op->ac.index].transport); _leave(" = t"); return true; +out_of_addresses: + /* We've now had a failure to respond on all of a server's addresses - + * immediately probe them again and consider retrying the server. + */ + afs_probe_fileserver(op->net, op->server); + if (op->flags & AFS_OPERATION_RETRY_SERVER) { + alist = op->ac.alist; + error = afs_wait_for_one_fs_probe( + op->server, !(op->flags & AFS_OPERATION_UNINTR)); + switch (error) { + case 0: + op->flags &= ~AFS_OPERATION_RETRY_SERVER; + goto retry_server; + case -ERESTARTSYS: + goto failed_set_error; + case -ETIME: + case -EDESTADDRREQ: + goto next_server; + } + } + next_server: _debug("next"); - afs_end_cursor(&fc->ac); + afs_end_cursor(&op->ac); goto pick_server; no_more_servers: /* That's all the servers poked to no good effect. Try again if some * of them were busy. */ - if (fc->flags & AFS_FS_CURSOR_VBUSY) + if (op->flags & AFS_OPERATION_VBUSY) goto restart_from_beginning; e.error = -EDESTADDRREQ; e.responded = false; - for (i = 0; i < fc->server_list->nr_servers; i++) { - struct afs_server *s = fc->server_list->servers[i].server; + for (i = 0; i < op->server_list->nr_servers; i++) { + struct afs_server *s = op->server_list->servers[i].server; afs_prioritise_error(&e, READ_ONCE(s->probe.error), s->probe.abort_code); @@ -470,101 +459,18 @@ no_more_servers: error = e.error; failed_set_error: - fc->error = error; + op->error = error; failed: - fc->flags |= AFS_FS_CURSOR_STOP; - afs_end_cursor(&fc->ac); - _leave(" = f [failed %d]", fc->error); - return false; -} - -/* - * Select the same fileserver we used for a vnode before and only that - * fileserver. We use this when we have a lock on that file, which is backed - * only by the fileserver we obtained it from. - */ -bool afs_select_current_fileserver(struct afs_fs_cursor *fc) -{ - struct afs_vnode *vnode = fc->vnode; - struct afs_cb_interest *cbi; - struct afs_addr_list *alist; - int error = fc->ac.error; - - _enter(""); - - cbi = rcu_dereference_protected(vnode->cb_interest, - lockdep_is_held(&vnode->io_lock)); - - switch (error) { - case SHRT_MAX: - if (!cbi) { - fc->error = -ESTALE; - fc->flags |= AFS_FS_CURSOR_STOP; - return false; - } - - fc->cbi = afs_get_cb_interest(cbi); - - read_lock(&cbi->server->fs_lock); - alist = rcu_dereference_protected(cbi->server->addresses, - lockdep_is_held(&cbi->server->fs_lock)); - afs_get_addrlist(alist); - read_unlock(&cbi->server->fs_lock); - if (!alist) { - fc->error = -ESTALE; - fc->flags |= AFS_FS_CURSOR_STOP; - return false; - } - - memset(&fc->ac, 0, sizeof(fc->ac)); - fc->ac.alist = alist; - fc->ac.index = -1; - goto iterate_address; - - case 0: - default: - /* Success or local failure. Stop. */ - fc->error = error; - fc->flags |= AFS_FS_CURSOR_STOP; - _leave(" = f [okay/local %d]", error); - return false; - - case -ECONNABORTED: - fc->error = afs_abort_to_error(fc->ac.abort_code); - fc->flags |= AFS_FS_CURSOR_STOP; - _leave(" = f [abort]"); - return false; - - case -ERFKILL: - case -EADDRNOTAVAIL: - case -ENETUNREACH: - case -EHOSTUNREACH: - case -EHOSTDOWN: - case -ECONNREFUSED: - case -ETIMEDOUT: - case -ETIME: - _debug("no conn"); - fc->error = error; - goto iterate_address; - } - -iterate_address: - /* Iterate over the current server's address list to try and find an - * address on which it will respond to us. - */ - if (afs_iterate_addresses(&fc->ac)) { - _leave(" = t"); - return true; - } - - afs_end_cursor(&fc->ac); + op->flags |= AFS_OPERATION_STOP; + afs_end_cursor(&op->ac); + _leave(" = f [failed %d]", op->error); return false; } /* * Dump cursor state in the case of the error being EDESTADDRREQ. */ -static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc) +void afs_dump_edestaddrreq(const struct afs_operation *op) { static int count; int i; @@ -576,13 +482,14 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc) rcu_read_lock(); pr_notice("EDESTADDR occurred\n"); - pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n", - fc->cb_break, fc->cb_break_2, fc->flags, fc->error); + pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n", + op->file[0].cb_break_before, + op->file[1].cb_break_before, op->flags, op->error); pr_notice("FC: ut=%lx ix=%d ni=%u\n", - fc->untried, fc->index, fc->nr_iterations); + op->untried, op->index, op->nr_iterations); - if (fc->server_list) { - const struct afs_server_list *sl = fc->server_list; + if (op->server_list) { + const struct afs_server_list *sl = op->server_list; pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n", sl->nr_servers, sl->preferred, sl->vnovol_mask); for (i = 0; i < sl->nr_servers; i++) { @@ -596,41 +503,16 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc) a->version, a->nr_ipv4, a->nr_addrs, a->max_addrs, a->preferred); - pr_notice("FC: - pr=%lx R=%lx F=%lx\n", - a->probed, a->responded, a->failed); - if (a == fc->ac.alist) + pr_notice("FC: - R=%lx F=%lx\n", + a->responded, a->failed); + if (a == op->ac.alist) pr_notice("FC: - current\n"); } } } pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n", - fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error, - fc->ac.responded, fc->ac.nr_iterations); + op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error, + op->ac.responded, op->ac.nr_iterations); rcu_read_unlock(); } - -/* - * Tidy up a filesystem cursor and unlock the vnode. - */ -int afs_end_vnode_operation(struct afs_fs_cursor *fc) -{ - struct afs_net *net = afs_v2net(fc->vnode); - - if (fc->error == -EDESTADDRREQ || - fc->error == -EADDRNOTAVAIL || - fc->error == -ENETUNREACH || - fc->error == -EHOSTUNREACH) - afs_dump_edestaddrreq(fc); - - mutex_unlock(&fc->vnode->io_lock); - - afs_end_cursor(&fc->ac); - afs_put_cb_interest(net, fc->cbi); - afs_put_serverlist(net, fc->server_list); - - if (fc->error == -ECONNABORTED) - fc->error = afs_abort_to_error(fc->ac.abort_code); - - return fc->error; -} diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 58d396592250..eccc3cd0cb70 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -18,7 +18,6 @@ struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); -static void afs_delete_async_call(struct work_struct *); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); @@ -38,7 +37,6 @@ int afs_open_socket(struct afs_net *net) { struct sockaddr_rxrpc srx; struct socket *socket; - unsigned int min_level; int ret; _enter(""); @@ -58,9 +56,8 @@ int afs_open_socket(struct afs_net *net) srx.transport.sin6.sin6_family = AF_INET6; srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); - min_level = RXRPC_SECURITY_ENCRYPT; - ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL, - (void *)&min_level, sizeof(min_level)); + ret = rxrpc_sock_set_min_security_level(socket->sk, + RXRPC_SECURITY_ENCRYPT); if (ret < 0) goto error_2; @@ -148,14 +145,14 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, call->type = type; call->net = net; call->debug_id = atomic_inc_return(&rxrpc_debug_id); - atomic_set(&call->usage, 1); + refcount_set(&call->ref, 1); INIT_WORK(&call->async_work, afs_process_async_call); init_waitqueue_head(&call->waitq); spin_lock_init(&call->state_lock); call->iter = &call->def_iter; o = atomic_inc_return(&net->nr_outstanding_calls); - trace_afs_call(call, afs_call_trace_alloc, 1, o, + trace_afs_call(call->debug_id, afs_call_trace_alloc, 1, o, __builtin_return_address(0)); return call; } @@ -166,14 +163,16 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, void afs_put_call(struct afs_call *call) { struct afs_net *net = call->net; - int n = atomic_dec_return(&call->usage); - int o = atomic_read(&net->nr_outstanding_calls); + unsigned int debug_id = call->debug_id; + bool zero; + int r, o; - trace_afs_call(call, afs_call_trace_put, n + 1, o, + zero = __refcount_dec_and_test(&call->ref, &r); + o = atomic_read(&net->nr_outstanding_calls); + trace_afs_call(debug_id, afs_call_trace_put, r - 1, o, __builtin_return_address(0)); - ASSERTCMP(n, >=, 0); - if (n == 0) { + if (zero) { ASSERT(!work_pending(&call->async_work)); ASSERT(call->type->name != NULL); @@ -184,12 +183,11 @@ void afs_put_call(struct afs_call *call) if (call->type->destructor) call->type->destructor(call); - afs_put_server(call->net, call->server, afs_server_trace_put_call); - afs_put_cb_interest(call->net, call->cbi); + afs_unuse_server_notime(call->net, call->server, afs_server_trace_put_call); afs_put_addrlist(call->alist); kfree(call->request); - trace_afs_call(call, afs_call_trace_free, 0, o, + trace_afs_call(call->debug_id, afs_call_trace_free, 0, o, __builtin_return_address(0)); kfree(call); @@ -202,9 +200,11 @@ void afs_put_call(struct afs_call *call) static struct afs_call *afs_get_call(struct afs_call *call, enum afs_call_trace why) { - int u = atomic_inc_return(&call->usage); + int r; - trace_afs_call(call, why, u, + __refcount_inc(&call->ref, &r); + + trace_afs_call(call->debug_id, why, r + 1, atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); return call; @@ -275,39 +275,6 @@ void afs_flat_call_destructor(struct afs_call *call) call->buffer = NULL; } -#define AFS_BVEC_MAX 8 - -/* - * Load the given bvec with the next few pages. - */ -static void afs_load_bvec(struct afs_call *call, struct msghdr *msg, - struct bio_vec *bv, pgoff_t first, pgoff_t last, - unsigned offset) -{ - struct page *pages[AFS_BVEC_MAX]; - unsigned int nr, n, i, to, bytes = 0; - - nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX); - n = find_get_pages_contig(call->mapping, first, nr, pages); - ASSERTCMP(n, ==, nr); - - msg->msg_flags |= MSG_MORE; - for (i = 0; i < nr; i++) { - to = PAGE_SIZE; - if (first + i >= last) { - to = call->last_to; - msg->msg_flags &= ~MSG_MORE; - } - bv[i].bv_page = pages[i]; - bv[i].bv_len = to - offset; - bv[i].bv_offset = offset; - bytes += to - offset; - offset = 0; - } - - iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes); -} - /* * Advance the AFS call state when the RxRPC call ends the transmit phase. */ @@ -321,41 +288,6 @@ static void afs_notify_end_request_tx(struct sock *sock, } /* - * attach the data from a bunch of pages on an inode to a call - */ -static int afs_send_pages(struct afs_call *call, struct msghdr *msg) -{ - struct bio_vec bv[AFS_BVEC_MAX]; - unsigned int bytes, nr, loop, offset; - pgoff_t first = call->first, last = call->last; - int ret; - - offset = call->first_offset; - call->first_offset = 0; - - do { - afs_load_bvec(call, msg, bv, first, last, offset); - trace_afs_send_pages(call, msg, first, last, offset); - - offset = 0; - bytes = msg->msg_iter.count; - nr = msg->msg_iter.nr_segs; - - ret = rxrpc_kernel_send_data(call->net->socket, call->rxcall, msg, - bytes, afs_notify_end_request_tx); - for (loop = 0; loop < nr; loop++) - put_page(bv[loop].bv_page); - if (ret < 0) - break; - - first += nr; - } while (first <= last); - - trace_afs_sent_pages(call, call->first, last, first, ret); - return ret; -} - -/* * Initiate a call and synchronously queue up the parameters for dispatch. Any * error is stored into the call struct, which the caller must check for. */ @@ -365,6 +297,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; + size_t len; s64 tx_total_len; int ret; @@ -385,25 +318,16 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) * after the initial fixed part. */ tx_total_len = call->request_size; - if (call->send_pages) { - if (call->last == call->first) { - tx_total_len += call->last_to - call->first_offset; - } else { - /* It looks mathematically like you should be able to - * combine the following lines with the ones above, but - * unsigned arithmetic is fun when it wraps... - */ - tx_total_len += PAGE_SIZE - call->first_offset; - tx_total_len += call->last_to; - tx_total_len += (call->last - call->first - 1) * PAGE_SIZE; - } - } + if (call->write_iter) + tx_total_len += iov_iter_count(call->write_iter); /* If the call is going to be asynchronous, we need an extra ref for * the call to hold itself so the caller need not hang on to its ref. */ - if (call->async) + if (call->async) { afs_get_call(call, afs_call_trace_get); + call->drop_ref = true; + } /* create a call */ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, @@ -413,7 +337,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade, - call->intr, + (call->intr ? RXRPC_PREINTERRUPTIBLE : + RXRPC_UNINTERRUPTIBLE), call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -426,6 +351,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) if (call->max_lifespan) rxrpc_kernel_set_max_life(call->net->socket, rxcall, call->max_lifespan); + call->issue_time = ktime_get_real(); /* send the request */ iov[0].iov_base = call->request; @@ -436,7 +362,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size); msg.msg_control = NULL; msg.msg_controllen = 0; - msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0); + msg.msg_flags = MSG_WAITALL | (call->write_iter ? MSG_MORE : 0); ret = rxrpc_kernel_send_data(call->net->socket, rxcall, &msg, call->request_size, @@ -444,8 +370,18 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) if (ret < 0) goto error_do_abort; - if (call->send_pages) { - ret = afs_send_pages(call, &msg); + if (call->write_iter) { + msg.msg_iter = *call->write_iter; + msg.msg_flags &= ~MSG_MORE; + trace_afs_send_data(call, &msg); + + ret = rxrpc_kernel_send_data(call->net->socket, + call->rxcall, &msg, + iov_iter_count(&msg.msg_iter), + afs_notify_end_request_tx); + *call->write_iter = msg.msg_iter; + + trace_afs_sent_data(call, &msg, ret); if (ret < 0) goto error_do_abort; } @@ -463,9 +399,10 @@ error_do_abort: rxrpc_kernel_abort_call(call->net->socket, rxcall, RX_USER_ABORT, ret, "KSD"); } else { + len = 0; iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0); rxrpc_kernel_recv_data(call->net->socket, rxcall, - &msg.msg_iter, false, + &msg.msg_iter, &len, false, &call->abort_code, &call->service_id); ac->abort_code = call->abort_code; ac->responded = true; @@ -496,11 +433,45 @@ error_kill_call: } /* + * Log remote abort codes that indicate that we have a protocol disagreement + * with the server. + */ +static void afs_log_error(struct afs_call *call, s32 remote_abort) +{ + static int max = 0; + const char *msg; + int m; + + switch (remote_abort) { + case RX_EOF: msg = "unexpected EOF"; break; + case RXGEN_CC_MARSHAL: msg = "client marshalling"; break; + case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break; + case RXGEN_SS_MARSHAL: msg = "server marshalling"; break; + case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break; + case RXGEN_DECODE: msg = "opcode decode"; break; + case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break; + case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break; + case -32: msg = "insufficient data"; break; + default: + return; + } + + m = max; + if (m < 3) { + max = m + 1; + pr_notice("kAFS: Peer reported %s failure on %s [%pISp]\n", + msg, call->type->name, + &call->alist->addrs[call->addr_ix].transport); + } +} + +/* * deliver messages to a call */ static void afs_deliver_to_call(struct afs_call *call) { enum afs_call_state state; + size_t len; u32 abort_code, remote_abort = 0; int ret; @@ -513,10 +484,11 @@ static void afs_deliver_to_call(struct afs_call *call) state == AFS_CALL_SV_AWAIT_ACK ) { if (state == AFS_CALL_SV_AWAIT_ACK) { + len = 0; iov_iter_kvec(&call->def_iter, READ, NULL, 0, 0); ret = rxrpc_kernel_recv_data(call->net->socket, call->rxcall, &call->def_iter, - false, &remote_abort, + &len, false, &remote_abort, &call->service_id); trace_afs_receive_data(call, &call->def_iter, false, ret); @@ -530,21 +502,17 @@ static void afs_deliver_to_call(struct afs_call *call) return; } - if (!call->have_reply_time && - rxrpc_kernel_get_reply_time(call->net->socket, - call->rxcall, - &call->reply_time)) - call->have_reply_time = true; - ret = call->type->deliver(call); state = READ_ONCE(call->state); + if (ret == 0 && call->unmarshalling_error) + ret = -EBADMSG; switch (ret) { case 0: afs_queue_call_work(call); if (state == AFS_CALL_CL_PROC_REPLY) { - if (call->cbi) + if (call->op) set_bit(AFS_SERVER_FL_MAY_HAVE_CB, - &call->cbi->server->flags); + &call->op->server->flags); goto call_complete; } ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY); @@ -554,6 +522,7 @@ static void afs_deliver_to_call(struct afs_call *call) goto out; case -ECONNABORTED: ASSERTCMP(state, ==, AFS_CALL_COMPLETE); + afs_log_error(call, call->abort_code); goto done; case -ENOTSUPP: abort_code = RXGEN_OPCODE; @@ -563,10 +532,12 @@ static void afs_deliver_to_call(struct afs_call *call) case -EIO: pr_err("kAFS: Call %u in bad state %u\n", call->debug_id, state); - /* Fall through */ + fallthrough; case -ENODATA: case -EBADMSG: case -EMSGSIZE: + case -ENOMEM: + case -EFAULT: abort_code = RXGEN_CC_UNMARSHAL; if (state != AFS_CALL_CL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; @@ -574,7 +545,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code, ret, "KUM"); goto local_abort; default: - abort_code = RX_USER_ABORT; + abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, abort_code, ret, "KER"); goto local_abort; @@ -584,8 +555,6 @@ static void afs_deliver_to_call(struct afs_call *call) done: if (call->type->done) call->type->done(call); - if (state == AFS_CALL_COMPLETE && call->incoming) - afs_put_call(call); out: _leave(""); return; @@ -604,11 +573,7 @@ call_complete: long afs_wait_for_call_to_complete(struct afs_call *call, struct afs_addr_cursor *ac) { - signed long rtt2, timeout; long ret; - bool stalled = false; - u64 rtt; - u32 life, last_life; bool rxrpc_complete = false; DECLARE_WAITQUEUE(myself, current); @@ -619,14 +584,6 @@ long afs_wait_for_call_to_complete(struct afs_call *call, if (ret < 0) goto out; - rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); - rtt2 = nsecs_to_jiffies64(rtt) * 2; - if (rtt2 < 2) - rtt2 = 2; - - timeout = rtt2; - rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life); - add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -637,37 +594,19 @@ long afs_wait_for_call_to_complete(struct afs_call *call, call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); - timeout = rtt2; continue; } if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; - if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) { + if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) { /* rxrpc terminated the call. */ rxrpc_complete = true; break; } - if (call->intr && timeout == 0 && - life == last_life && signal_pending(current)) { - if (stalled) - break; - __set_current_state(TASK_RUNNING); - rxrpc_kernel_probe_life(call->net->socket, call->rxcall); - timeout = rtt2; - stalled = true; - continue; - } - - if (life != last_life) { - timeout = rtt2; - last_life = life; - stalled = false; - } - - timeout = schedule_timeout(timeout); + schedule(); } remove_wait_queue(&call->waitq, &myself); @@ -696,7 +635,7 @@ long afs_wait_for_call_to_complete(struct afs_call *call, ret = call->ret0; call->ret0 = 0; - /* Fall through */ + fallthrough; case -ECONNABORTED: ac->responded = true; break; @@ -728,14 +667,13 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, unsigned long call_user_ID) { struct afs_call *call = (struct afs_call *)call_user_ID; - int u; + int r; trace_afs_notify_call(rxcall, call); call->need_attention = true; - u = atomic_fetch_add_unless(&call->usage, 1, 0); - if (u != 0) { - trace_afs_call(call, afs_call_trace_wake, u, + if (__refcount_inc_not_zero(&call->ref, &r)) { + trace_afs_call(call->debug_id, afs_call_trace_wake, r + 1, atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); @@ -745,21 +683,6 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, } /* - * Delete an asynchronous call. The work item carries a ref to the call struct - * that we need to release. - */ -static void afs_delete_async_call(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - _enter(""); - - afs_put_call(call); - - _leave(""); -} - -/* * Perform I/O processing on an asynchronous call. The work item carries a ref * to the call struct that we either need to release or to pass on. */ @@ -774,16 +697,6 @@ static void afs_process_async_call(struct work_struct *work) afs_deliver_to_call(call); } - if (call->state == AFS_CALL_COMPLETE) { - /* We have two refs to release - one from the alloc and one - * queued with the work item - and we can't just deallocate the - * call because the work item may be queued again. - */ - call->async_work.func = afs_delete_async_call; - if (!queue_work(afs_async_calls, &call->async_work)) - afs_put_call(call); - } - afs_put_call(call); _leave(""); } @@ -810,6 +723,7 @@ void afs_charge_preallocation(struct work_struct *work) if (!call) break; + call->drop_ref = true; call->async = true; call->state = AFS_CALL_SV_AWAIT_OP_ID; init_waitqueue_head(&call->waitq); @@ -922,8 +836,8 @@ void afs_send_empty_reply(struct afs_call *call) case -ENOMEM: _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RX_USER_ABORT, -ENOMEM, "KOO"); - /* Fall through */ + RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); + fallthrough; default: _leave(" [error]"); return; @@ -964,7 +878,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); rxrpc_kernel_abort_call(net->socket, call->rxcall, - RX_USER_ABORT, -ENOMEM, "KOO"); + RXGEN_SS_MARSHAL, -ENOMEM, "KOO"); } _leave(" [error]"); } @@ -980,10 +894,11 @@ int afs_extract_data(struct afs_call *call, bool want_more) u32 remote_abort = 0; int ret; - _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more); + _enter("{%s,%zu,%zu},%d", + call->type->name, call->iov_len, iov_iter_count(iter), want_more); ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter, - want_more, &remote_abort, + &call->iov_len, want_more, &remote_abort, &call->service_id); if (ret == 0 || ret == -EAGAIN) return ret; @@ -1013,9 +928,11 @@ int afs_extract_data(struct afs_call *call, bool want_more) /* * Log protocol error production. */ -noinline int afs_protocol_error(struct afs_call *call, int error, +noinline int afs_protocol_error(struct afs_call *call, enum afs_eproto_cause cause) { - trace_afs_protocol_error(call, error, cause); - return error; + trace_afs_protocol_error(call, cause); + if (call) + call->unmarshalling_error = true; + return -EBADMSG; } diff --git a/fs/afs/security.c b/fs/afs/security.c index ce9de1e6742b..7c6a63a30394 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -170,8 +170,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, break; } - if (afs_cb_is_broken(cb_break, vnode, - rcu_dereference(vnode->cb_interest))) { + if (afs_cb_is_broken(cb_break, vnode)) { changed = true; break; } @@ -201,7 +200,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest))) + if (afs_cb_is_broken(cb_break, vnode)) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to @@ -220,8 +219,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, * yet. */ size++; - new = kzalloc(sizeof(struct afs_permits) + - sizeof(struct afs_permit) * size, GFP_NOFS); + new = kzalloc(struct_size(new, permits, size), GFP_NOFS); if (!new) goto out_put; @@ -281,8 +279,7 @@ found: rcu_read_lock(); spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); - if (!afs_cb_is_broken(cb_break, vnode, rcu_dereference(vnode->cb_interest)) && - zap == permits) + if (!afs_cb_is_broken(cb_break, vnode) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else zap = replacement; @@ -398,10 +395,11 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key, * - AFS ACLs are attached to directories only, and a file is controlled by its * parent directory's ACL */ -int afs_permission(struct inode *inode, int mask) +int afs_permission(struct user_namespace *mnt_userns, struct inode *inode, + int mask) { struct afs_vnode *vnode = AFS_FS_I(inode); - afs_access_t uninitialized_var(access); + afs_access_t access; struct key *key; int ret = 0; diff --git a/fs/afs/server.c b/fs/afs/server.c index b7f3cb2130ca..4981baf97835 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -12,19 +12,11 @@ #include "protocol_yfs.h" static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */ -static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */ static atomic_t afs_server_debug_id; -static void afs_inc_servers_outstanding(struct afs_net *net) -{ - atomic_inc(&net->servers_outstanding); -} - -static void afs_dec_servers_outstanding(struct afs_net *net) -{ - if (atomic_dec_and_test(&net->servers_outstanding)) - wake_up_var(&net->servers_outstanding); -} +static struct afs_server *afs_maybe_use_server(struct afs_server *, + enum afs_server_trace); +static void __afs_put_server(struct afs_net *, struct afs_server *); /* * Find a server by one of its addresses. @@ -41,7 +33,7 @@ struct afs_server *afs_find_server(struct afs_net *net, do { if (server) - afs_put_server(net, server, afs_server_trace_put_find_rsq); + afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq); server = NULL; read_seqbegin_or_lock(&net->fs_addr_lock, &seq); @@ -79,9 +71,9 @@ struct afs_server *afs_find_server(struct afs_net *net, } server = NULL; + continue; found: - if (server && !atomic_inc_not_zero(&server->usage)) - server = NULL; + server = afs_maybe_use_server(server, afs_server_trace_get_by_addr); } while (need_seqretry(&net->fs_addr_lock, seq)); @@ -92,7 +84,7 @@ struct afs_server *afs_find_server(struct afs_net *net, } /* - * Look up a server by its UUID + * Look up a server by its UUID and mark it active. */ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid) { @@ -108,7 +100,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu * changes. */ if (server) - afs_put_server(net, server, afs_server_trace_put_uuid_rsq); + afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq); server = NULL; read_seqbegin_or_lock(&net->fs_lock, &seq); @@ -123,7 +115,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu } else if (diff > 0) { p = p->rb_right; } else { - afs_get_server(server, afs_server_trace_get_by_uuid); + afs_use_server(server, afs_server_trace_get_by_uuid); break; } @@ -138,13 +130,16 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu } /* - * Install a server record in the namespace tree + * Install a server record in the namespace tree. If there's a clash, we stick + * it into a list anchored on whichever afs_server struct is actually in the + * tree. */ -static struct afs_server *afs_install_server(struct afs_net *net, +static struct afs_server *afs_install_server(struct afs_cell *cell, struct afs_server *candidate) { const struct afs_addr_list *alist; - struct afs_server *server; + struct afs_server *server, *next; + struct afs_net *net = cell->net; struct rb_node **pp, *p; int diff; @@ -160,12 +155,30 @@ static struct afs_server *afs_install_server(struct afs_net *net, _debug("- consider %p", p); server = rb_entry(p, struct afs_server, uuid_rb); diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t)); - if (diff < 0) + if (diff < 0) { pp = &(*pp)->rb_left; - else if (diff > 0) + } else if (diff > 0) { pp = &(*pp)->rb_right; - else - goto exists; + } else { + if (server->cell == cell) + goto exists; + + /* We have the same UUID representing servers in + * different cells. Append the new server to the list. + */ + for (;;) { + next = rcu_dereference_protected( + server->uuid_next, + lockdep_is_held(&net->fs_lock.lock)); + if (!next) + break; + server = next; + } + rcu_assign_pointer(server->uuid_next, candidate); + candidate->uuid_prev = server; + server = candidate; + goto added_dup; + } } server = candidate; @@ -173,6 +186,7 @@ static struct afs_server *afs_install_server(struct afs_net *net, rb_insert_color(&server->uuid_rb, &net->fs_servers); hlist_add_head_rcu(&server->proc_link, &net->fs_proc); +added_dup: write_seqlock(&net->fs_addr_lock); alist = rcu_dereference_protected(server->addresses, lockdep_is_held(&net->fs_addr_lock.lock)); @@ -199,13 +213,14 @@ exists: } /* - * allocate a new server record + * Allocate a new server record and mark it active. */ -static struct afs_server *afs_alloc_server(struct afs_net *net, +static struct afs_server *afs_alloc_server(struct afs_cell *cell, const uuid_t *uuid, struct afs_addr_list *alist) { struct afs_server *server; + struct afs_net *net = cell->net; _enter(""); @@ -213,20 +228,22 @@ static struct afs_server *afs_alloc_server(struct afs_net *net, if (!server) goto enomem; - atomic_set(&server->usage, 1); + refcount_set(&server->ref, 1); + atomic_set(&server->active, 1); server->debug_id = atomic_inc_return(&afs_server_debug_id); RCU_INIT_POINTER(server->addresses, alist); server->addr_version = alist->version; server->uuid = *uuid; - server->update_at = ktime_get_real_seconds() + afs_server_update_delay; rwlock_init(&server->fs_lock); - INIT_HLIST_HEAD(&server->cb_volumes); - rwlock_init(&server->cb_break_lock); + INIT_WORK(&server->initcb_work, afs_server_init_callback_work); init_waitqueue_head(&server->probe_wq); + INIT_LIST_HEAD(&server->probe_link); spin_lock_init(&server->probe_lock); + server->cell = cell; + server->rtt = UINT_MAX; afs_inc_servers_outstanding(net); - trace_afs_server(server, 1, afs_server_trace_alloc); + trace_afs_server(server->debug_id, 1, 1, afs_server_trace_alloc); _leave(" = %p", server); return server; @@ -264,7 +281,7 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell, * Get or create a fileserver record. */ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, - const uuid_t *uuid) + const uuid_t *uuid, u32 addr_version) { struct afs_addr_list *alist; struct afs_server *server, *candidate; @@ -272,26 +289,34 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key, _enter("%p,%pU", cell->net, uuid); server = afs_find_server_by_uuid(cell->net, uuid); - if (server) + if (server) { + if (server->addr_version != addr_version) + set_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags); return server; + } alist = afs_vl_lookup_addrs(cell, key, uuid); if (IS_ERR(alist)) return ERR_CAST(alist); - candidate = afs_alloc_server(cell->net, uuid, alist); + candidate = afs_alloc_server(cell, uuid, alist); if (!candidate) { afs_put_addrlist(alist); return ERR_PTR(-ENOMEM); } - server = afs_install_server(cell->net, candidate); + server = afs_install_server(cell, candidate); if (server != candidate) { afs_put_addrlist(alist); kfree(candidate); + } else { + /* Immediately dispatch an asynchronous probe to each interface + * on the fileserver. This will make sure the repeat-probing + * service is started. + */ + afs_fs_probe_fileserver(cell->net, server, key, true); } - _leave(" = %p{%d}", server, atomic_read(&server->usage)); return server; } @@ -327,9 +352,44 @@ void afs_servers_timer(struct timer_list *timer) struct afs_server *afs_get_server(struct afs_server *server, enum afs_server_trace reason) { - unsigned int u = atomic_inc_return(&server->usage); + unsigned int a; + int r; + + __refcount_inc(&server->ref, &r); + a = atomic_read(&server->active); + trace_afs_server(server->debug_id, r + 1, a, reason); + return server; +} + +/* + * Try to get a reference on a server object. + */ +static struct afs_server *afs_maybe_use_server(struct afs_server *server, + enum afs_server_trace reason) +{ + unsigned int a; + int r; + + if (!__refcount_inc_not_zero(&server->ref, &r)) + return NULL; - trace_afs_server(server, u, reason); + a = atomic_inc_return(&server->active); + trace_afs_server(server->debug_id, r + 1, a, reason); + return server; +} + +/* + * Get an active count on a server object. + */ +struct afs_server *afs_use_server(struct afs_server *server, enum afs_server_trace reason) +{ + unsigned int a; + int r; + + __refcount_inc(&server->ref, &r); + a = atomic_inc_return(&server->active); + + trace_afs_server(server->debug_id, r + 1, a, reason); return server; } @@ -339,37 +399,65 @@ struct afs_server *afs_get_server(struct afs_server *server, void afs_put_server(struct afs_net *net, struct afs_server *server, enum afs_server_trace reason) { - unsigned int usage; + unsigned int a, debug_id = server->debug_id; + bool zero; + int r; if (!server) return; - server->put_time = ktime_get_real_seconds(); - - usage = atomic_dec_return(&server->usage); + a = atomic_inc_return(&server->active); + zero = __refcount_dec_and_test(&server->ref, &r); + trace_afs_server(debug_id, r - 1, a, reason); + if (unlikely(zero)) + __afs_put_server(net, server); +} - trace_afs_server(server, usage, reason); +/* + * Drop an active count on a server object without updating the last-unused + * time. + */ +void afs_unuse_server_notime(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason) +{ + if (server) { + unsigned int active = atomic_dec_return(&server->active); - if (likely(usage > 0)) - return; + if (active == 0) + afs_set_server_timer(net, afs_server_gc_delay); + afs_put_server(net, server, reason); + } +} - afs_set_server_timer(net, afs_server_gc_delay); +/* + * Drop an active count on a server object. + */ +void afs_unuse_server(struct afs_net *net, struct afs_server *server, + enum afs_server_trace reason) +{ + if (server) { + server->unuse_time = ktime_get_real_seconds(); + afs_unuse_server_notime(net, server, reason); + } } static void afs_server_rcu(struct rcu_head *rcu) { struct afs_server *server = container_of(rcu, struct afs_server, rcu); - trace_afs_server(server, atomic_read(&server->usage), - afs_server_trace_free); + trace_afs_server(server->debug_id, refcount_read(&server->ref), + atomic_read(&server->active), afs_server_trace_free); afs_put_addrlist(rcu_access_pointer(server->addresses)); kfree(server); } -/* - * destroy a dead server - */ -static void afs_destroy_server(struct afs_net *net, struct afs_server *server) +static void __afs_put_server(struct afs_net *net, struct afs_server *server) +{ + call_rcu(&server->rcu, afs_server_rcu); + afs_dec_servers_outstanding(net); +} + +static void afs_give_up_callbacks(struct afs_net *net, struct afs_server *server) { struct afs_addr_list *alist = rcu_access_pointer(server->addresses); struct afs_addr_cursor ac = { @@ -378,19 +466,19 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) .error = 0, }; - trace_afs_server(server, atomic_read(&server->usage), - afs_server_trace_give_up_cb); + afs_fs_give_up_all_callbacks(net, server, &ac, NULL); +} +/* + * destroy a dead server + */ +static void afs_destroy_server(struct afs_net *net, struct afs_server *server) +{ if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) - afs_fs_give_up_all_callbacks(net, server, &ac, NULL); - - wait_var_event(&server->probe_outstanding, - atomic_read(&server->probe_outstanding) == 0); + afs_give_up_callbacks(net, server); - trace_afs_server(server, atomic_read(&server->usage), - afs_server_trace_destroy); - call_rcu(&server->rcu, afs_server_rcu); - afs_dec_servers_outstanding(net); + flush_work(&server->initcb_work); + afs_put_server(net, server, afs_server_trace_destroy); } /* @@ -398,32 +486,49 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) */ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list) { - struct afs_server *server; - bool deleted; - int usage; + struct afs_server *server, *next, *prev; + int active; while ((server = gc_list)) { gc_list = server->gc_next; write_seqlock(&net->fs_lock); - usage = 1; - deleted = atomic_try_cmpxchg(&server->usage, &usage, 0); - trace_afs_server(server, usage, afs_server_trace_gc); - if (deleted) { - rb_erase(&server->uuid_rb, &net->fs_servers); - hlist_del_rcu(&server->proc_link); - } - write_sequnlock(&net->fs_lock); - if (deleted) { - write_seqlock(&net->fs_addr_lock); + active = atomic_read(&server->active); + if (active == 0) { + trace_afs_server(server->debug_id, refcount_read(&server->ref), + active, afs_server_trace_gc); + next = rcu_dereference_protected( + server->uuid_next, lockdep_is_held(&net->fs_lock.lock)); + prev = server->uuid_prev; + if (!prev) { + /* The one at the front is in the tree */ + if (!next) { + rb_erase(&server->uuid_rb, &net->fs_servers); + } else { + rb_replace_node_rcu(&server->uuid_rb, + &next->uuid_rb, + &net->fs_servers); + next->uuid_prev = NULL; + } + } else { + /* This server is not at the front */ + rcu_assign_pointer(prev->uuid_next, next); + if (next) + next->uuid_prev = prev; + } + + list_del(&server->probe_link); + hlist_del_rcu(&server->proc_link); if (!hlist_unhashed(&server->addr4_link)) hlist_del_rcu(&server->addr4_link); if (!hlist_unhashed(&server->addr6_link)) hlist_del_rcu(&server->addr6_link); - write_sequnlock(&net->fs_addr_lock); - afs_destroy_server(net, server); } + write_sequnlock(&net->fs_lock); + + if (active == 0) + afs_destroy_server(net, server); } } @@ -452,15 +557,19 @@ void afs_manage_servers(struct work_struct *work) for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) { struct afs_server *server = rb_entry(cursor, struct afs_server, uuid_rb); - int usage = atomic_read(&server->usage); + int active = atomic_read(&server->active); - _debug("manage %pU %u", &server->uuid, usage); + _debug("manage %pU %u", &server->uuid, active); - ASSERTCMP(usage, >=, 1); - ASSERTIFCMP(purging, usage, ==, 1); + if (purging) { + trace_afs_server(server->debug_id, refcount_read(&server->ref), + active, afs_server_trace_purging); + if (active != 0) + pr_notice("Can't purge s=%08x\n", server->debug_id); + } - if (usage == 1) { - time64_t expire_at = server->put_time; + if (active == 0) { + time64_t expire_at = server->unuse_time; if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) && !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags)) @@ -512,11 +621,12 @@ void afs_purge_servers(struct afs_net *net) _enter(""); if (del_timer_sync(&net->fs_timer)) - atomic_dec(&net->servers_outstanding); + afs_dec_servers_outstanding(net); afs_queue_server_manager(net); _debug("wait"); + atomic_dec(&net->servers_outstanding); wait_var_event(&net->servers_outstanding, !atomic_read(&net->servers_outstanding)); _leave(""); @@ -525,26 +635,28 @@ void afs_purge_servers(struct afs_net *net) /* * Get an update for a server's address list. */ -static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server) +static noinline bool afs_update_server_record(struct afs_operation *op, + struct afs_server *server) { struct afs_addr_list *alist, *discard; _enter(""); - trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update); + trace_afs_server(server->debug_id, refcount_read(&server->ref), + atomic_read(&server->active), + afs_server_trace_update); - alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key, - &server->uuid); + alist = afs_vl_lookup_addrs(op->volume->cell, op->key, &server->uuid); if (IS_ERR(alist)) { if ((PTR_ERR(alist) == -ERESTARTSYS || PTR_ERR(alist) == -EINTR) && - !(fc->flags & AFS_FS_CURSOR_INTR) && + (op->flags & AFS_OPERATION_UNINTR) && server->addresses) { _leave(" = t [intr]"); return true; } - fc->error = PTR_ERR(alist); - _leave(" = f [%d]", fc->error); + op->error = PTR_ERR(alist); + _leave(" = f [%d]", op->error); return false; } @@ -558,7 +670,6 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a write_unlock(&server->fs_lock); } - server->update_at = ktime_get_real_seconds() + afs_server_update_delay; afs_put_addrlist(discard); _leave(" = t"); return true; @@ -567,10 +678,8 @@ static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct a /* * See if a server's address list needs updating. */ -bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server) +bool afs_check_server_record(struct afs_operation *op, struct afs_server *server) { - time64_t now = ktime_get_real_seconds(); - long diff; bool success; int ret, retries = 0; @@ -579,28 +688,29 @@ bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server ASSERT(server); retry: - diff = READ_ONCE(server->update_at) - now; - if (diff > 0) { - _leave(" = t [not now %ld]", diff); - return true; - } + if (test_bit(AFS_SERVER_FL_UPDATING, &server->flags)) + goto wait; + if (test_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags)) + goto update; + _leave(" = t [good]"); + return true; +update: if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) { - success = afs_update_server_record(fc, server); + clear_bit(AFS_SERVER_FL_NEEDS_UPDATE, &server->flags); + success = afs_update_server_record(op, server); clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags); wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING); _leave(" = %d", success); return success; } +wait: ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING, - TASK_INTERRUPTIBLE); + (op->flags & AFS_OPERATION_UNINTR) ? + TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); if (ret == -ERESTARTSYS) { - if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) { - _leave(" = t [intr]"); - return true; - } - fc->error = ret; + op->error = ret; _leave(" = f [intr]"); return false; } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 888d91d195d9..ed9056703505 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -14,11 +14,9 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist) int i; if (slist && refcount_dec_and_test(&slist->usage)) { - for (i = 0; i < slist->nr_servers; i++) { - afs_put_cb_interest(net, slist->servers[i].cb_interest); - afs_put_server(net, slist->servers[i].server, - afs_server_trace_put_slist); - } + for (i = 0; i < slist->nr_servers; i++) + afs_unuse_server(net, slist->servers[i].server, + afs_server_trace_put_slist); kfree(slist); } } @@ -46,12 +44,16 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, refcount_set(&slist->usage, 1); rwlock_init(&slist->lock); + for (i = 0; i < AFS_MAXTYPES; i++) + slist->vids[i] = vldb->vid[i]; + /* Make sure a records exists for each server in the list. */ for (i = 0; i < vldb->nr_servers; i++) { if (!(vldb->fs_mask[i] & type_mask)) continue; - server = afs_lookup_server(cell, key, &vldb->fs_server[i]); + server = afs_lookup_server(cell, key, &vldb->fs_server[i], + vldb->addr_version[i]); if (IS_ERR(server)) { ret = PTR_ERR(server); if (ret == -ENOENT || @@ -123,31 +125,5 @@ changed: } } - /* Keep the old callback interest records where possible so that we - * maintain callback interception. - */ - i = 0; - j = 0; - while (i < old->nr_servers && j < new->nr_servers) { - if (new->servers[j].server == old->servers[i].server) { - struct afs_cb_interest *cbi = old->servers[i].cb_interest; - if (cbi) { - new->servers[j].cb_interest = cbi; - refcount_inc(&cbi->usage); - } - i++; - j++; - continue; - } - - if (new->servers[j].server < old->servers[i].server) { - j++; - continue; - } - - i++; - continue; - } - return true; } diff --git a/fs/afs/super.c b/fs/afs/super.c index dda7a9a66848..95d713074dc8 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -55,6 +55,7 @@ int afs_net_id; static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, + .write_inode = afs_write_inode, .drop_inode = afs_drop_inode, .destroy_inode = afs_destroy_inode, .free_inode = afs_free_inode, @@ -230,6 +231,9 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) _enter(",%s", name); + if (fc->source) + return invalf(fc, "kAFS: Multiple sources not supported"); + if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; @@ -294,7 +298,8 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } - afs_put_cell(ctx->net, ctx->cell); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse); + afs_see_cell(cell, afs_cell_trace_see_source); ctx->cell = cell; } @@ -352,7 +357,9 @@ static int afs_validate_fc(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_volume *volume; + struct afs_cell *cell; struct key *key; + int ret; if (!ctx->dyn_root) { if (ctx->no_cell) { @@ -365,6 +372,7 @@ static int afs_validate_fc(struct fs_context *fc) return -EDESTADDRREQ; } + reget_key: /* We try to do the mount securely. */ key = afs_request_key(ctx->cell); if (IS_ERR(key)) @@ -373,10 +381,27 @@ static int afs_validate_fc(struct fs_context *fc) ctx->key = key; if (ctx->volume) { - afs_put_volume(ctx->cell, ctx->volume); + afs_put_volume(ctx->net, ctx->volume, + afs_volume_trace_put_validate_fc); ctx->volume = NULL; } + if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) { + ret = afs_cell_detect_alias(ctx->cell, key); + if (ret < 0) + return ret; + if (ret == 1) { + _debug("switch to alias"); + key_put(ctx->key); + ctx->key = NULL; + cell = afs_use_cell(ctx->cell->alias_of, + afs_cell_trace_use_fc_alias); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); + ctx->cell = cell; + goto reget_key; + } + } + volume = afs_create_volume(ctx); if (IS_ERR(volume)) return PTR_ERR(volume); @@ -421,7 +446,6 @@ static int afs_set_super(struct super_block *sb, struct fs_context *fc) static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) { struct afs_super_info *as = AFS_FS_S(sb); - struct afs_iget_data iget_data; struct inode *inode = NULL; int ret; @@ -438,7 +462,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) ret = super_setup_bdi(sb); if (ret) return ret; - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; /* allocate the root inode and dentry */ if (as->dyn_root) { @@ -446,13 +469,7 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) } else { sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); - iget_data.fid.vid = as->volume->vid; - iget_data.fid.vnode = 1; - iget_data.fid.vnode_hi = 0; - iget_data.fid.unique = 1; - iget_data.cb_v_break = as->volume->cb_v_break; - iget_data.cb_s_break = 0; - inode = afs_iget(sb, ctx->key, &iget_data, NULL, NULL, NULL); + inode = afs_root_iget(sb, ctx->key); } if (IS_ERR(inode)) @@ -473,6 +490,7 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) goto error; } else { sb->s_d_op = &afs_fs_dentry_operations; + rcu_assign_pointer(as->volume->sb, sb); } _leave(" = 0"); @@ -495,8 +513,9 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) if (ctx->dyn_root) { as->dyn_root = true; } else { - as->cell = afs_get_cell(ctx->cell); - as->volume = __afs_get_volume(ctx->volume); + as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi); + as->volume = afs_get_volume(ctx->volume, + afs_volume_trace_get_alloc_sbi); } } return as; @@ -505,8 +524,9 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { - afs_put_volume(as->cell, as->volume); - afs_put_cell(afs_net(as->net_ns), as->cell); + struct afs_net *net = afs_net(as->net_ns); + afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi); + afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi); put_net(as->net_ns); kfree(as); } @@ -515,7 +535,6 @@ static void afs_destroy_sbi(struct afs_super_info *as) static void afs_kill_super(struct super_block *sb) { struct afs_super_info *as = AFS_FS_S(sb); - struct afs_net *net = afs_net(as->net_ns); if (as->dyn_root) afs_dynroot_depopulate(sb); @@ -524,7 +543,7 @@ static void afs_kill_super(struct super_block *sb) * deactivating the superblock. */ if (as->volume) - afs_clear_callback_interests(net, as->volume->servers); + rcu_assign_pointer(as->volume->sb, NULL); kill_anon_super(sb); if (as->volume) afs_deactivate_volume(as->volume); @@ -592,8 +611,8 @@ static void afs_free_fc(struct fs_context *fc) struct afs_fs_context *ctx = fc->fs_private; afs_destroy_sbi(fc->s_fs_info); - afs_put_volume(ctx->cell, ctx->volume); - afs_put_cell(ctx->net, ctx->cell); + afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc); + afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc); key_put(ctx->key); kfree(ctx); } @@ -620,9 +639,7 @@ static int afs_init_fs_context(struct fs_context *fc) ctx->net = afs_net(fc->net_ns); /* Default to the workstation cell. */ - rcu_read_lock(); - cell = afs_lookup_cell_rcu(ctx->net, NULL, 0); - rcu_read_unlock(); + cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc); if (IS_ERR(cell)) cell = NULL; ctx->cell = cell; @@ -642,7 +659,7 @@ static void afs_i_init_once(void *_vnode) struct afs_vnode *vnode = _vnode; memset(vnode, 0, sizeof(*vnode)); - inode_init_once(&vnode->vfs_inode); + inode_init_once(&vnode->netfs.inode); mutex_init(&vnode->io_lock); init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); @@ -651,6 +668,7 @@ static void afs_i_init_once(void *_vnode) INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); + INIT_LIST_HEAD(&vnode->cb_mmap_link); seqlock_init(&vnode->cb_lock); } @@ -661,7 +679,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb) { struct afs_vnode *vnode; - vnode = kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL); + vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL); if (!vnode) return NULL; @@ -670,22 +688,20 @@ static struct inode *afs_alloc_inode(struct super_block *sb) /* Reset anything that shouldn't leak from one inode to the next. */ memset(&vnode->fid, 0, sizeof(vnode->fid)); memset(&vnode->status, 0, sizeof(vnode->status)); + afs_vnode_set_cache(vnode, NULL); vnode->volume = NULL; vnode->lock_key = NULL; vnode->permit_cache = NULL; - RCU_INIT_POINTER(vnode->cb_interest, NULL); -#ifdef CONFIG_AFS_FSCACHE - vnode->cache = NULL; -#endif vnode->flags = 1 << AFS_VNODE_UNSET; vnode->lock_state = AFS_VNODE_LOCK_NONE; init_rwsem(&vnode->rmdir_lock); + INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work); - _leave(" = %p", &vnode->vfs_inode); - return &vnode->vfs_inode; + _leave(" = %p", &vnode->netfs.inode); + return &vnode->netfs.inode; } static void afs_free_inode(struct inode *inode) @@ -704,22 +720,38 @@ static void afs_destroy_inode(struct inode *inode) _debug("DESTROY INODE %p", inode); - ASSERTCMP(rcu_access_pointer(vnode->cb_interest), ==, NULL); - atomic_dec(&afs_count_active_inodes); } +static void afs_get_volume_status_success(struct afs_operation *op) +{ + struct afs_volume_status *vs = &op->volstatus.vs; + struct kstatfs *buf = op->volstatus.buf; + + if (vs->max_quota == 0) + buf->f_blocks = vs->part_max_blocks; + else + buf->f_blocks = vs->max_quota; + + if (buf->f_blocks > vs->blocks_in_use) + buf->f_bavail = buf->f_bfree = + buf->f_blocks - vs->blocks_in_use; +} + +static const struct afs_operation_ops afs_get_volume_status_operation = { + .issue_afs_rpc = afs_fs_get_volume_status, + .issue_yfs_rpc = yfs_fs_get_volume_status, + .success = afs_get_volume_status_success, +}; + /* * return information about an AFS volume */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct afs_super_info *as = AFS_FS_S(dentry->d_sb); - struct afs_fs_cursor fc; - struct afs_volume_status vs; + struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); - struct key *key; - int ret; buf->f_type = dentry->d_sb->s_magic; buf->f_bsize = AFS_BLOCK_SIZE; @@ -732,31 +764,13 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) - return PTR_ERR(key); + op = afs_alloc_operation(NULL, as->volume); + if (IS_ERR(op)) + return PTR_ERR(op); - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, true)) { - fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_get_volume_status(&fc, &vs); - } - - afs_check_for_remote_deletion(&fc, fc.vnode); - ret = afs_end_vnode_operation(&fc); - } - - key_put(key); - - if (ret == 0) { - if (vs.max_quota == 0) - buf->f_blocks = vs.part_max_blocks; - else - buf->f_blocks = vs.max_quota; - buf->f_bavail = buf->f_bfree = buf->f_blocks - vs.blocks_in_use; - } - - return ret; + afs_op_set_vnode(op, 0, vnode); + op->nr_files = 1; + op->volstatus.buf = buf; + op->ops = &afs_get_volume_status_operation; + return afs_do_sync_operation(op); } diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c new file mode 100644 index 000000000000..f04a80e4f5c3 --- /dev/null +++ b/fs/afs/vl_alias.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AFS cell alias detection + * + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/namei.h> +#include <keys/rxrpc-type.h> +#include "internal.h" + +/* + * Sample a volume. + */ +static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *key, + const char *name, unsigned int namelen) +{ + struct afs_volume *volume; + struct afs_fs_context fc = { + .type = 0, /* Explicitly leave it to the VLDB */ + .volnamesz = namelen, + .volname = name, + .net = cell->net, + .cell = cell, + .key = key, /* This might need to be something */ + }; + + volume = afs_create_volume(&fc); + _leave(" = %p", volume); + return volume; +} + +/* + * Compare two addresses. + */ +static int afs_compare_addrs(const struct sockaddr_rxrpc *srx_a, + const struct sockaddr_rxrpc *srx_b) +{ + short port_a, port_b; + int addr_a, addr_b, diff; + + diff = (short)srx_a->transport_type - (short)srx_b->transport_type; + if (diff) + goto out; + + switch (srx_a->transport_type) { + case AF_INET: { + const struct sockaddr_in *a = &srx_a->transport.sin; + const struct sockaddr_in *b = &srx_b->transport.sin; + addr_a = ntohl(a->sin_addr.s_addr); + addr_b = ntohl(b->sin_addr.s_addr); + diff = addr_a - addr_b; + if (diff == 0) { + port_a = ntohs(a->sin_port); + port_b = ntohs(b->sin_port); + diff = port_a - port_b; + } + break; + } + + case AF_INET6: { + const struct sockaddr_in6 *a = &srx_a->transport.sin6; + const struct sockaddr_in6 *b = &srx_b->transport.sin6; + diff = memcmp(&a->sin6_addr, &b->sin6_addr, 16); + if (diff == 0) { + port_a = ntohs(a->sin6_port); + port_b = ntohs(b->sin6_port); + diff = port_a - port_b; + } + break; + } + + default: + WARN_ON(1); + diff = 1; + } + +out: + return diff; +} + +/* + * Compare the address lists of a pair of fileservers. + */ +static int afs_compare_fs_alists(const struct afs_server *server_a, + const struct afs_server *server_b) +{ + const struct afs_addr_list *la, *lb; + int a = 0, b = 0, addr_matches = 0; + + la = rcu_dereference(server_a->addresses); + lb = rcu_dereference(server_b->addresses); + + while (a < la->nr_addrs && b < lb->nr_addrs) { + const struct sockaddr_rxrpc *srx_a = &la->addrs[a]; + const struct sockaddr_rxrpc *srx_b = &lb->addrs[b]; + int diff = afs_compare_addrs(srx_a, srx_b); + + if (diff < 0) { + a++; + } else if (diff > 0) { + b++; + } else { + addr_matches++; + a++; + b++; + } + } + + return addr_matches; +} + +/* + * Compare the fileserver lists of two volumes. The server lists are sorted in + * order of ascending UUID. + */ +static int afs_compare_volume_slists(const struct afs_volume *vol_a, + const struct afs_volume *vol_b) +{ + const struct afs_server_list *la, *lb; + int i, a = 0, b = 0, uuid_matches = 0, addr_matches = 0; + + la = rcu_dereference(vol_a->servers); + lb = rcu_dereference(vol_b->servers); + + for (i = 0; i < AFS_MAXTYPES; i++) + if (la->vids[i] != lb->vids[i]) + return 0; + + while (a < la->nr_servers && b < lb->nr_servers) { + const struct afs_server *server_a = la->servers[a].server; + const struct afs_server *server_b = lb->servers[b].server; + int diff = memcmp(&server_a->uuid, &server_b->uuid, sizeof(uuid_t)); + + if (diff < 0) { + a++; + } else if (diff > 0) { + b++; + } else { + uuid_matches++; + addr_matches += afs_compare_fs_alists(server_a, server_b); + a++; + b++; + } + } + + _leave(" = %d [um %d]", addr_matches, uuid_matches); + return addr_matches; +} + +/* + * Compare root.cell volumes. + */ +static int afs_compare_cell_roots(struct afs_cell *cell) +{ + struct afs_cell *p; + + _enter(""); + + rcu_read_lock(); + + hlist_for_each_entry_rcu(p, &cell->net->proc_cells, proc_link) { + if (p == cell || p->alias_of) + continue; + if (!p->root_volume) + continue; /* Ignore cells that don't have a root.cell volume. */ + + if (afs_compare_volume_slists(cell->root_volume, p->root_volume) != 0) + goto is_alias; + } + + rcu_read_unlock(); + _leave(" = 0"); + return 0; + +is_alias: + rcu_read_unlock(); + cell->alias_of = afs_use_cell(p, afs_cell_trace_use_alias); + return 1; +} + +/* + * Query the new cell for a volume from a cell we're already using. + */ +static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key, + struct afs_cell *p) +{ + struct afs_volume *volume, *pvol = NULL; + int ret; + + /* Arbitrarily pick a volume from the list. */ + read_seqlock_excl(&p->volume_lock); + if (!RB_EMPTY_ROOT(&p->volumes)) + pvol = afs_get_volume(rb_entry(p->volumes.rb_node, + struct afs_volume, cell_node), + afs_volume_trace_get_query_alias); + read_sequnlock_excl(&p->volume_lock); + if (!pvol) + return 0; + + _enter("%s:%s", cell->name, pvol->name); + + /* And see if it's in the new cell. */ + volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len); + if (IS_ERR(volume)) { + afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias); + if (PTR_ERR(volume) != -ENOMEDIUM) + return PTR_ERR(volume); + /* That volume is not in the new cell, so not an alias */ + return 0; + } + + /* The new cell has a like-named volume also - compare volume ID, + * server and address lists. + */ + ret = 0; + if (pvol->vid == volume->vid) { + rcu_read_lock(); + if (afs_compare_volume_slists(volume, pvol)) + ret = 1; + rcu_read_unlock(); + } + + afs_put_volume(cell->net, volume, afs_volume_trace_put_query_alias); + afs_put_volume(cell->net, pvol, afs_volume_trace_put_query_alias); + return ret; +} + +/* + * Query the new cell for volumes we know exist in cells we're already using. + */ +static int afs_query_for_alias(struct afs_cell *cell, struct key *key) +{ + struct afs_cell *p; + + _enter("%s", cell->name); + + if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) + return -ERESTARTSYS; + + hlist_for_each_entry(p, &cell->net->proc_cells, proc_link) { + if (p == cell || p->alias_of) + continue; + if (RB_EMPTY_ROOT(&p->volumes)) + continue; + if (p->root_volume) + continue; /* Ignore cells that have a root.cell volume. */ + afs_use_cell(p, afs_cell_trace_use_check_alias); + mutex_unlock(&cell->net->proc_cells_lock); + + if (afs_query_for_alias_one(cell, key, p) != 0) + goto is_alias; + + if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) { + afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias); + return -ERESTARTSYS; + } + + afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias); + } + + mutex_unlock(&cell->net->proc_cells_lock); + _leave(" = 0"); + return 0; + +is_alias: + cell->alias_of = p; /* Transfer our ref */ + return 1; +} + +/* + * Look up a VLDB record for a volume. + */ +static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key) +{ + struct afs_vl_cursor vc; + char *cell_name = ERR_PTR(-EDESTADDRREQ); + bool skipped = false, not_skipped = false; + int ret; + + if (!afs_begin_vlserver_operation(&vc, cell, key)) + return ERR_PTR(-ERESTARTSYS); + + while (afs_select_vlserver(&vc)) { + if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) { + vc.ac.error = -EOPNOTSUPP; + skipped = true; + continue; + } + not_skipped = true; + cell_name = afs_yfsvl_get_cell_name(&vc); + } + + ret = afs_end_vlserver_operation(&vc); + if (skipped && !not_skipped) + ret = -EOPNOTSUPP; + return ret < 0 ? ERR_PTR(ret) : cell_name; +} + +static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) +{ + struct afs_cell *master; + char *cell_name; + + cell_name = afs_vl_get_cell_name(cell, key); + if (IS_ERR(cell_name)) + return PTR_ERR(cell_name); + + if (strcmp(cell_name, cell->name) == 0) { + kfree(cell_name); + return 0; + } + + master = afs_lookup_cell(cell->net, cell_name, strlen(cell_name), + NULL, false); + kfree(cell_name); + if (IS_ERR(master)) + return PTR_ERR(master); + + cell->alias_of = master; /* Transfer our ref */ + return 1; +} + +static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key) +{ + struct afs_volume *root_volume; + int ret; + + _enter("%s", cell->name); + + ret = yfs_check_canonical_cell_name(cell, key); + if (ret != -EOPNOTSUPP) + return ret; + + /* Try and get the root.cell volume for comparison with other cells */ + root_volume = afs_sample_volume(cell, key, "root.cell", 9); + if (!IS_ERR(root_volume)) { + cell->root_volume = root_volume; + return afs_compare_cell_roots(cell); + } + + if (PTR_ERR(root_volume) != -ENOMEDIUM) + return PTR_ERR(root_volume); + + /* Okay, this cell doesn't have an root.cell volume. We need to + * locate some other random volume and use that to check. + */ + return afs_query_for_alias(cell, key); +} + +/* + * Check to see if a new cell is an alias of a cell we already have. At this + * point we have the cell's volume server list. + * + * Returns 0 if we didn't detect an alias, 1 if we found an alias and an error + * if we had problems gathering the data required. In the case the we did + * detect an alias, cell->alias_of is set to point to the assumed master. + */ +int afs_cell_detect_alias(struct afs_cell *cell, struct key *key) +{ + struct afs_net *net = cell->net; + int ret; + + if (mutex_lock_interruptible(&net->cells_alias_lock) < 0) + return -ERESTARTSYS; + + if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &cell->flags)) { + ret = afs_do_cell_detect_alias(cell, key); + if (ret >= 0) + clear_bit_unlock(AFS_CELL_FL_CHECK_ALIAS, &cell->flags); + } else { + ret = cell->alias_of ? 1 : 0; + } + + mutex_unlock(&net->cells_alias_lock); + + if (ret == 1) + pr_notice("kAFS: Cell %s is an alias of %s\n", + cell->name, cell->alias_of->name); + return ret; +} diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c index 8fea54eba0c2..acc48216136a 100644 --- a/fs/afs/vl_list.c +++ b/fs/afs/vl_list.c @@ -17,10 +17,11 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len, vlserver = kzalloc(struct_size(vlserver, name, name_len + 1), GFP_KERNEL); if (vlserver) { - atomic_set(&vlserver->usage, 1); + refcount_set(&vlserver->ref, 1); rwlock_init(&vlserver->lock); init_waitqueue_head(&vlserver->probe_wq); spin_lock_init(&vlserver->probe_lock); + vlserver->rtt = UINT_MAX; vlserver->name_len = name_len; vlserver->port = port; memcpy(vlserver->name, name, name_len); @@ -38,13 +39,9 @@ static void afs_vlserver_rcu(struct rcu_head *rcu) void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver) { - if (vlserver) { - unsigned int u = atomic_dec_return(&vlserver->usage); - //_debug("VL PUT %p{%u}", vlserver, u); - - if (u == 0) - call_rcu(&vlserver->rcu, afs_vlserver_rcu); - } + if (vlserver && + refcount_dec_and_test(&vlserver->ref)) + call_rcu(&vlserver->rcu, afs_vlserver_rcu); } struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers) @@ -53,7 +50,7 @@ struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers) vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL); if (vllist) { - atomic_set(&vllist->usage, 1); + refcount_set(&vllist->ref, 1); rwlock_init(&vllist->lock); } @@ -63,10 +60,7 @@ struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers) void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist) { if (vllist) { - unsigned int u = atomic_dec_return(&vllist->usage); - - //_debug("VLLS PUT %p{%u}", vllist, u); - if (u == 0) { + if (refcount_dec_and_test(&vllist->ref)) { int i; for (i = 0; i < vllist->nr_servers; i++) { diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index 858498cc1b05..d1c7068b4346 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -11,15 +11,33 @@ #include "internal.h" #include "protocol_yfs.h" -static bool afs_vl_probe_done(struct afs_vlserver *server) + +/* + * Handle the completion of a set of probes. + */ +static void afs_finished_vl_probe(struct afs_vlserver *server) { - if (!atomic_dec_and_test(&server->probe_outstanding)) - return false; + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) { + server->rtt = UINT_MAX; + clear_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags); + } - wake_up_var(&server->probe_outstanding); clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags); wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING); - return true; +} + +/* + * Handle the completion of a probe RPC call. + */ +static void afs_done_one_vl_probe(struct afs_vlserver *server, bool wake_up) +{ + if (atomic_dec_and_test(&server->probe_outstanding)) { + afs_finished_vl_probe(server); + wake_up = true; + } + + if (wake_up) + wake_up_all(&server->probe_wq); } /* @@ -31,10 +49,9 @@ void afs_vlserver_probe_result(struct afs_call *call) struct afs_addr_list *alist = call->alist; struct afs_vlserver *server = call->vlserver; unsigned int server_index = call->server_index; + unsigned int rtt_us = 0; unsigned int index = call->addr_ix; - unsigned int rtt = UINT_MAX; bool have_result = false; - u64 _rtt; int ret = call->error; _enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code); @@ -46,15 +63,20 @@ void afs_vlserver_probe_result(struct afs_call *call) server->probe.error = 0; goto responded; case -ECONNABORTED: - if (!server->probe.responded) { + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED)) { server->probe.abort_code = call->abort_code; server->probe.error = ret; } goto responded; case -ENOMEM: case -ENONET: - server->probe.local_failure = true; - afs_io_error(call, afs_io_error_vl_probe_fail); + case -EKEYEXPIRED: + case -EKEYREVOKED: + case -EKEYREJECTED: + server->probe.flags |= AFS_VLSERVER_PROBE_LOCAL_FAILURE; + if (server->probe.error == 0) + server->probe.error = ret; + trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail); goto out; case -ECONNRESET: /* Responded, but call expired. */ case -ERFKILL: @@ -68,12 +90,12 @@ void afs_vlserver_probe_result(struct afs_call *call) default: clear_bit(index, &alist->responded); set_bit(index, &alist->failed); - if (!server->probe.responded && + if (!(server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) && (server->probe.error == 0 || server->probe.error == -ETIMEDOUT || server->probe.error == -ETIME)) server->probe.error = ret; - afs_io_error(call, afs_io_error_vl_probe_fail); + trace_afs_io_error(call->debug_id, ret, afs_io_error_vl_probe_fail); goto out; } @@ -82,46 +104,36 @@ responded: clear_bit(index, &alist->failed); if (call->service_id == YFS_VL_SERVICE) { - server->probe.is_yfs = true; + server->probe.flags |= AFS_VLSERVER_PROBE_IS_YFS; set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); alist->addrs[index].srx_service = call->service_id; } else { - server->probe.not_yfs = true; - if (!server->probe.is_yfs) { + server->probe.flags |= AFS_VLSERVER_PROBE_NOT_YFS; + if (!(server->probe.flags & AFS_VLSERVER_PROBE_IS_YFS)) { clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags); alist->addrs[index].srx_service = call->service_id; } } - /* Get the RTT and scale it to fit into a 32-bit value that represents - * over a minute of time so that we can access it with one instruction - * on a 32-bit system. - */ - _rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); - _rtt /= 64; - rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt; - if (rtt < server->probe.rtt) { - server->probe.rtt = rtt; + if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && + rtt_us < server->probe.rtt) { + server->probe.rtt = rtt_us; + server->rtt = rtt_us; alist->preferred = index; - have_result = true; } smp_wmb(); /* Set rtt before responded. */ - server->probe.responded = true; + server->probe.flags |= AFS_VLSERVER_PROBE_RESPONDED; set_bit(AFS_VLSERVER_FL_PROBED, &server->flags); + set_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags); + have_result = true; out: spin_unlock(&server->probe_lock); _debug("probe [%u][%u] %pISpc rtt=%u ret=%d", - server_index, index, &alist->addrs[index].transport, - (unsigned int)rtt, ret); + server_index, index, &alist->addrs[index].transport, rtt_us, ret); - have_result |= afs_vl_probe_done(server); - if (have_result) { - server->probe.have_result = true; - wake_up_var(&server->probe.have_result); - wake_up_all(&server->probe_wq); - } + afs_done_one_vl_probe(server, have_result); } /* @@ -159,11 +171,10 @@ static bool afs_do_probe_vlserver(struct afs_net *net, in_progress = true; } else { afs_prioritise_error(_e, PTR_ERR(call), ac.abort_code); + afs_done_one_vl_probe(server, false); } } - if (!in_progress) - afs_vl_probe_done(server); return in_progress; } @@ -201,7 +212,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, { struct wait_queue_entry *waits; struct afs_vlserver *server; - unsigned int rtt = UINT_MAX; + unsigned int rtt = UINT_MAX, rtt_s; bool have_responders = false; int pref = -1, i; @@ -213,7 +224,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, server = vllist->servers[i].server; if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) __clear_bit(i, &untried); - if (server->probe.responded) + if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) have_responders = true; } } @@ -239,7 +250,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, for (i = 0; i < vllist->nr_servers; i++) { if (test_bit(i, &untried)) { server = vllist->servers[i].server; - if (server->probe.responded) + if (server->probe.flags & AFS_VLSERVER_PROBE_RESPONDED) goto stop; if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags)) still_probing = true; @@ -257,10 +268,11 @@ stop: for (i = 0; i < vllist->nr_servers; i++) { if (test_bit(i, &untried)) { server = vllist->servers[i].server; - if (server->probe.responded && - server->probe.rtt < rtt) { + rtt_s = READ_ONCE(server->rtt); + if (test_bit(AFS_VLSERVER_FL_RESPONDING, &server->flags) && + rtt_s < rtt) { pref = i; - rtt = server->probe.rtt; + rtt = rtt_s; } remove_wait_queue(&server->probe_wq, &waits[i]); diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c index 9a5ce9687779..488e58490b16 100644 --- a/fs/afs/vl_rotate.c +++ b/fs/afs/vl_rotate.c @@ -45,7 +45,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc) cell->dns_expiry <= ktime_get_real_seconds()) { dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count); set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); - queue_work(afs_wq, &cell->manager); + afs_queue_cell(cell, afs_cell_trace_get_queue_dns); if (cell->dns_source == DNS_RECORD_UNAVAILABLE) { if (wait_var_event_interruptible( @@ -151,6 +151,10 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc) vc->error = error; vc->flags |= AFS_VL_CURSOR_RETRY; goto next_server; + + case -EOPNOTSUPP: + _debug("notsupp"); + goto next_server; } restart_from_beginning: @@ -188,7 +192,8 @@ pick_server: for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; - if (!test_bit(i, &vc->untried) || !s->probe.responded) + if (!test_bit(i, &vc->untried) || + !test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) continue; if (s->probe.rtt < rtt) { vc->index = i; @@ -258,10 +263,14 @@ no_more_servers: for (i = 0; i < vc->server_list->nr_servers; i++) { struct afs_vlserver *s = vc->server_list->servers[i].server; + if (test_bit(AFS_VLSERVER_FL_RESPONDING, &s->flags)) + e.responded = true; afs_prioritise_error(&e, READ_ONCE(s->probe.error), s->probe.abort_code); } + error = e.error; + failed_set_error: vc->error = error; failed: @@ -302,8 +311,8 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc) pr_notice("VC: - nr=%u/%u/%u pf=%u\n", a->nr_ipv4, a->nr_addrs, a->max_addrs, a->preferred); - pr_notice("VC: - pr=%lx R=%lx F=%lx\n", - a->probed, a->responded, a->failed); + pr_notice("VC: - R=%lx F=%lx\n", + a->responded, a->failed); if (a == vc->ac.alist) pr_notice("VC: - current\n"); } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 516e9a3bb5b4..00fca3c66ba6 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -82,6 +82,7 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call) for (j = 0; j < 6; j++) uuid->node[j] = (u8)ntohl(xdr->node[j]); + entry->addr_version[n] = ntohl(uvldb->serverUnique[i]); entry->nr_servers++; } @@ -195,7 +196,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) /* Extract the returned uuid, uniquifier, nentries and * blkaddrs size */ - /* Fall through */ + fallthrough; case 1: ret = afs_extract_data(call, true); if (ret < 0) @@ -220,7 +221,7 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call) count = min(call->count, 4U); afs_extract_to_buf(call, count * sizeof(__be32)); - /* Fall through - and extract entries */ + fallthrough; /* and extract entries */ case 2: ret = afs_extract_data(call, call->count > 4); if (ret < 0) @@ -323,7 +324,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through - and extract the capabilities word count */ + fallthrough; /* and extract the capabilities word count */ case 1: ret = afs_extract_data(call, true); if (ret < 0) @@ -336,7 +337,7 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call) call->unmarshall++; afs_extract_discard(call, count * sizeof(__be32)); - /* Fall through - and extract capabilities words */ + fallthrough; /* and extract capabilities words */ case 2: ret = afs_extract_data(call, false); if (ret < 0) @@ -435,7 +436,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) /* Extract the returned uuid, uniquifier, fsEndpoints count and * either the first fsEndpoint type or the volEndpoints * count if there are no fsEndpoints. */ - /* Fall through */ + fallthrough; case 1: ret = afs_extract_data(call, true); if (ret < 0) @@ -447,8 +448,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) call->count2 = ntohl(*bp); /* Type or next count */ if (call->count > YFS_MAXENDPOINTS) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_fsendpt_num); + return afs_protocol_error(call, afs_eproto_yvl_fsendpt_num); alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT); if (!alist) @@ -468,15 +468,14 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) size = sizeof(__be32) * (1 + 4 + 1); break; default: - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_fsendpt_type); + return afs_protocol_error(call, afs_eproto_yvl_fsendpt_type); } size += sizeof(__be32); afs_extract_to_buf(call, size); call->unmarshall = 2; - /* Fall through - and extract fsEndpoints[] entries */ + fallthrough; /* and extract fsEndpoints[] entries */ case 2: ret = afs_extract_data(call, true); if (ret < 0) @@ -487,21 +486,20 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_fsendpt4_len); + return afs_protocol_error( + call, afs_eproto_yvl_fsendpt4_len); afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2])); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_fsendpt6_len); + return afs_protocol_error( + call, afs_eproto_yvl_fsendpt6_len); afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5])); bp += 6; break; default: - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_fsendpt_type); + return afs_protocol_error(call, afs_eproto_yvl_fsendpt_type); } /* Got either the type of the next entry or the count of @@ -519,8 +517,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) if (!call->count) goto end; if (call->count > YFS_MAXENDPOINTS) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_vlendpt_type); + return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type); afs_extract_to_buf(call, 1 * sizeof(__be32)); call->unmarshall = 3; @@ -529,7 +526,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) * extract the type of the next endpoint when we extract the * data of the current one, but this is the first... */ - /* Fall through */ + fallthrough; case 3: ret = afs_extract_data(call, true); if (ret < 0) @@ -547,8 +544,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) size = sizeof(__be32) * (1 + 4 + 1); break; default: - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_vlendpt_type); + return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type); } if (call->count > 1) @@ -556,7 +552,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) afs_extract_to_buf(call, size); call->unmarshall = 4; - /* Fall through - and extract volEndpoints[] entries */ + fallthrough; /* and extract volEndpoints[] entries */ case 4: ret = afs_extract_data(call, true); if (ret < 0) @@ -566,19 +562,18 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) switch (call->count2) { case YFS_ENDPOINT_IPV4: if (ntohl(bp[0]) != sizeof(__be32) * 2) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_vlendpt4_len); + return afs_protocol_error( + call, afs_eproto_yvl_vlendpt4_len); bp += 3; break; case YFS_ENDPOINT_IPV6: if (ntohl(bp[0]) != sizeof(__be32) * 5) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_vlendpt6_len); + return afs_protocol_error( + call, afs_eproto_yvl_vlendpt6_len); bp += 6; break; default: - return afs_protocol_error(call, -EBADMSG, - afs_eproto_yvl_vlendpt_type); + return afs_protocol_error(call, afs_eproto_yvl_vlendpt_type); } /* Got either the type of the next entry or the count of @@ -592,12 +587,13 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call) afs_extract_discard(call, 0); call->unmarshall = 5; - /* Fall through - Done */ + fallthrough; /* Done */ case 5: ret = afs_extract_data(call, false); if (ret < 0) return ret; call->unmarshall = 6; + fallthrough; case 6: break; @@ -650,3 +646,114 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc, afs_make_call(&vc->ac, call, GFP_KERNEL); return (struct afs_addr_list *)afs_wait_for_call_to_complete(call, &vc->ac); } + +/* + * Deliver reply data to a YFSVL.GetCellName operation. + */ +static int afs_deliver_yfsvl_get_cell_name(struct afs_call *call) +{ + char *cell_name; + u32 namesz, paddedsz; + int ret; + + _enter("{%u,%zu/%u}", + call->unmarshall, iov_iter_count(call->iter), call->count); + + switch (call->unmarshall) { + case 0: + afs_extract_to_tmp(call); + call->unmarshall++; + + fallthrough; /* and extract the cell name length */ + case 1: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + namesz = ntohl(call->tmp); + if (namesz > AFS_MAXCELLNAME) + return afs_protocol_error(call, afs_eproto_cellname_len); + paddedsz = (namesz + 3) & ~3; + call->count = namesz; + call->count2 = paddedsz - namesz; + + cell_name = kmalloc(namesz + 1, GFP_KERNEL); + if (!cell_name) + return -ENOMEM; + cell_name[namesz] = 0; + call->ret_str = cell_name; + + afs_extract_begin(call, cell_name, namesz); + call->unmarshall++; + + fallthrough; /* and extract cell name */ + case 2: + ret = afs_extract_data(call, true); + if (ret < 0) + return ret; + + afs_extract_discard(call, call->count2); + call->unmarshall++; + + fallthrough; /* and extract padding */ + case 3: + ret = afs_extract_data(call, false); + if (ret < 0) + return ret; + + call->unmarshall++; + break; + } + + _leave(" = 0 [done]"); + return 0; +} + +static void afs_destroy_yfsvl_get_cell_name(struct afs_call *call) +{ + kfree(call->ret_str); + afs_flat_call_destructor(call); +} + +/* + * VL.GetCapabilities operation type + */ +static const struct afs_call_type afs_YFSVLGetCellName = { + .name = "YFSVL.GetCellName", + .op = afs_YFSVL_GetCellName, + .deliver = afs_deliver_yfsvl_get_cell_name, + .destructor = afs_destroy_yfsvl_get_cell_name, +}; + +/* + * Probe a volume server for the capabilities that it supports. This can + * return up to 196 words. + * + * We use this to probe for service upgrade to determine what the server at the + * other end supports. + */ +char *afs_yfsvl_get_cell_name(struct afs_vl_cursor *vc) +{ + struct afs_call *call; + struct afs_net *net = vc->cell->net; + __be32 *bp; + + _enter(""); + + call = afs_alloc_flat_call(net, &afs_YFSVLGetCellName, 1 * 4, 0); + if (!call) + return ERR_PTR(-ENOMEM); + + call->key = vc->key; + call->ret_str = NULL; + call->max_lifespan = AFS_VL_MAX_LIFESPAN; + + /* marshall the parameters */ + bp = call->request; + *bp++ = htonl(YVLGETCELLNAME); + + /* Can't take a ref on server */ + trace_afs_make_vl_call(call); + afs_make_call(&vc->ac, call, GFP_KERNEL); + return (char *)afs_wait_for_call_to_complete(call, &vc->ac); +} diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 92ca5e27573b..f4937029dcd7 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -9,8 +9,57 @@ #include <linux/slab.h> #include "internal.h" -unsigned __read_mostly afs_volume_gc_delay = 10; -unsigned __read_mostly afs_volume_record_life = 60 * 60; +static unsigned __read_mostly afs_volume_record_life = 60 * 60; + +/* + * Insert a volume into a cell. If there's an existing volume record, that is + * returned instead with a ref held. + */ +static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, + struct afs_volume *volume) +{ + struct afs_volume *p; + struct rb_node *parent = NULL, **pp; + + write_seqlock(&cell->volume_lock); + + pp = &cell->volumes.rb_node; + while (*pp) { + parent = *pp; + p = rb_entry(parent, struct afs_volume, cell_node); + if (p->vid < volume->vid) { + pp = &(*pp)->rb_left; + } else if (p->vid > volume->vid) { + pp = &(*pp)->rb_right; + } else { + volume = afs_get_volume(p, afs_volume_trace_get_cell_insert); + goto found; + } + } + + rb_link_node_rcu(&volume->cell_node, parent, pp); + rb_insert_color(&volume->cell_node, &cell->volumes); + hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes); + +found: + write_sequnlock(&cell->volume_lock); + return volume; + +} + +static void afs_remove_volume_from_cell(struct afs_volume *volume) +{ + struct afs_cell *cell = volume->cell; + + if (!hlist_unhashed(&volume->proc_link)) { + trace_afs_volume(volume->vid, refcount_read(&cell->ref), + afs_volume_trace_remove); + write_seqlock(&cell->volume_lock); + hlist_del_rcu(&volume->proc_link); + rb_erase(&volume->cell_node, &cell->volumes); + write_sequnlock(&cell->volume_lock); + } +} /* * Allocate a volume record and load it up from a vldb record. @@ -33,13 +82,13 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, volume->vid = vldb->vid[params->type]; volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; - volume->cell = afs_get_cell(params->cell); + volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); volume->type = params->type; volume->type_force = params->force; volume->name_len = vldb->name_len; - atomic_set(&volume->usage, 1); - INIT_LIST_HEAD(&volume->proc_link); + refcount_set(&volume->ref, 1); + INIT_HLIST_NODE(&volume->proc_link); rwlock_init(&volume->servers_lock); rwlock_init(&volume->cb_v_break_lock); memcpy(volume->name, vldb->name, vldb->name_len + 1); @@ -51,17 +100,37 @@ static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, } refcount_set(&slist->usage, 1); - volume->servers = slist; + rcu_assign_pointer(volume->servers, slist); + trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc); return volume; error_1: - afs_put_cell(params->net, volume->cell); + afs_put_cell(volume->cell, afs_cell_trace_put_vol); kfree(volume); error_0: return ERR_PTR(ret); } /* + * Look up or allocate a volume record. + */ +static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, + struct afs_vldb_entry *vldb, + unsigned long type_mask) +{ + struct afs_volume *candidate, *volume; + + candidate = afs_alloc_volume(params, vldb, type_mask); + if (IS_ERR(candidate)) + return candidate; + + volume = afs_insert_volume_into_cell(params->cell, candidate); + if (volume != candidate) + afs_put_volume(params->net, candidate, afs_volume_trace_put_cell_dup); + return volume; +} + +/* * Look up a VLDB record for a volume. */ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, @@ -138,7 +207,7 @@ struct afs_volume *afs_create_volume(struct afs_fs_context *params) } type_mask = 1UL << params->type; - volume = afs_alloc_volume(params, vldb, type_mask); + volume = afs_lookup_volume(params, vldb, type_mask); error: kfree(vldb); @@ -156,42 +225,77 @@ static void afs_destroy_volume(struct afs_net *net, struct afs_volume *volume) ASSERTCMP(volume->cache, ==, NULL); #endif - afs_put_serverlist(net, volume->servers); - afs_put_cell(net, volume->cell); - kfree(volume); + afs_remove_volume_from_cell(volume); + afs_put_serverlist(net, rcu_access_pointer(volume->servers)); + afs_put_cell(volume->cell, afs_cell_trace_put_vol); + trace_afs_volume(volume->vid, refcount_read(&volume->ref), + afs_volume_trace_free); + kfree_rcu(volume, rcu); _leave(" [destroyed]"); } /* - * Drop a reference on a volume record. + * Get a reference on a volume record. */ -void afs_put_volume(struct afs_cell *cell, struct afs_volume *volume) +struct afs_volume *afs_get_volume(struct afs_volume *volume, + enum afs_volume_trace reason) { if (volume) { - _enter("%s", volume->name); + int r; + + __refcount_inc(&volume->ref, &r); + trace_afs_volume(volume->vid, r + 1, reason); + } + return volume; +} + - if (atomic_dec_and_test(&volume->usage)) - afs_destroy_volume(cell->net, volume); +/* + * Drop a reference on a volume record. + */ +void afs_put_volume(struct afs_net *net, struct afs_volume *volume, + enum afs_volume_trace reason) +{ + if (volume) { + afs_volid_t vid = volume->vid; + bool zero; + int r; + + zero = __refcount_dec_and_test(&volume->ref, &r); + trace_afs_volume(vid, r - 1, reason); + if (zero) + afs_destroy_volume(net, volume); } } /* * Activate a volume. */ -void afs_activate_volume(struct afs_volume *volume) +int afs_activate_volume(struct afs_volume *volume) { #ifdef CONFIG_AFS_FSCACHE - volume->cache = fscache_acquire_cookie(volume->cell->cache, - &afs_volume_cache_index_def, - &volume->vid, sizeof(volume->vid), - NULL, 0, - volume, 0, true); + struct fscache_volume *vcookie; + char *name; + + name = kasprintf(GFP_KERNEL, "afs,%s,%llx", + volume->cell->name, volume->vid); + if (!name) + return -ENOMEM; + + vcookie = fscache_acquire_volume(name, NULL, NULL, 0); + if (IS_ERR(vcookie)) { + if (vcookie != ERR_PTR(-EBUSY)) { + kfree(name); + return PTR_ERR(vcookie); + } + pr_err("AFS: Cache volume key already in use (%s)\n", name); + vcookie = NULL; + } + volume->cache = vcookie; + kfree(name); #endif - - write_lock(&volume->cell->proc_lock); - list_add_tail(&volume->proc_link, &volume->cell->proc_volumes); - write_unlock(&volume->cell->proc_lock); + return 0; } /* @@ -201,12 +305,8 @@ void afs_deactivate_volume(struct afs_volume *volume) { _enter("%s", volume->name); - write_lock(&volume->cell->proc_lock); - list_del_init(&volume->proc_link); - write_unlock(&volume->cell->proc_lock); - #ifdef CONFIG_AFS_FSCACHE - fscache_relinquish_cookie(volume->cache, NULL, + fscache_relinquish_volume(volume->cache, NULL, test_bit(AFS_VOLUME_DELETED, &volume->flags)); volume->cache = NULL; #endif @@ -256,17 +356,17 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key) write_lock(&volume->servers_lock); discard = new; - old = volume->servers; + old = rcu_dereference_protected(volume->servers, + lockdep_is_held(&volume->servers_lock)); if (afs_annotate_server_list(new, old)) { new->seq = volume->servers_seq + 1; - volume->servers = new; + rcu_assign_pointer(volume->servers, new); smp_wmb(); volume->servers_seq++; discard = old; } volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; - clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); write_unlock(&volume->servers_lock); ret = 0; @@ -281,25 +381,27 @@ error: /* * Make sure the volume record is up to date. */ -int afs_check_volume_status(struct afs_volume *volume, struct key *key) +int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op) { - time64_t now = ktime_get_real_seconds(); int ret, retries = 0; _enter(""); - if (volume->update_at <= now) - set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); - retry: - if (!test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags) && - !test_bit(AFS_VOLUME_WAIT, &volume->flags)) { - _leave(" = 0"); - return 0; - } - + if (test_bit(AFS_VOLUME_WAIT, &volume->flags)) + goto wait; + if (volume->update_at <= ktime_get_real_seconds() || + test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags)) + goto update; + _leave(" = 0"); + return 0; + +update: if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { - ret = afs_update_volume_status(volume, key); + clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); + ret = afs_update_volume_status(volume, op->key); + if (ret < 0) + set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); @@ -307,12 +409,15 @@ retry: return ret; } +wait: if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { _leave(" = 0 [no wait]"); return 0; } - ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE); + ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, + (op->flags & AFS_OPERATION_UNINTR) ? + TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); if (ret == -ERESTARTSYS) { _leave(" = %d", ret); return ret; diff --git a/fs/afs/write.c b/fs/afs/write.c index cb76566763db..9ebdd36eaf2f 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -11,124 +11,75 @@ #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/pagevec.h> +#include <linux/netfs.h> #include "internal.h" +static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len, + loff_t i_size, bool caching); + +#ifdef CONFIG_AFS_FSCACHE /* - * mark a page as having been made dirty and thus needing writeback + * Mark a page as having been made dirty and thus needing writeback. We also + * need to pin the cache object to write back to. */ -int afs_set_page_dirty(struct page *page) +bool afs_dirty_folio(struct address_space *mapping, struct folio *folio) { - _enter(""); - return __set_page_dirty_nobuffers(page); + return fscache_dirty_folio(mapping, folio, + afs_vnode_cache(AFS_FS_I(mapping->host))); } - -/* - * partly or wholly fill a page that's under preparation for writing - */ -static int afs_fill_page(struct afs_vnode *vnode, struct key *key, - loff_t pos, unsigned int len, struct page *page) +static void afs_folio_start_fscache(bool caching, struct folio *folio) +{ + if (caching) + folio_start_fscache(folio); +} +#else +static void afs_folio_start_fscache(bool caching, struct folio *folio) { - struct afs_read *req; - size_t p; - void *data; - int ret; - - _enter(",,%llu", (unsigned long long)pos); - - if (pos >= vnode->vfs_inode.i_size) { - p = pos & ~PAGE_MASK; - ASSERTCMP(p + len, <=, PAGE_SIZE); - data = kmap(page); - memset(data + p, 0, len); - kunmap(page); - return 0; - } - - req = kzalloc(struct_size(req, array, 1), GFP_KERNEL); - if (!req) - return -ENOMEM; - - refcount_set(&req->usage, 1); - req->pos = pos; - req->len = len; - req->nr_pages = 1; - req->pages = req->array; - req->pages[0] = page; - get_page(page); - - ret = afs_fetch_data(vnode, key, req); - afs_put_read(req); - if (ret < 0) { - if (ret == -ENOENT) { - _debug("got NOENT from server" - " - marking file deleted and stale"); - set_bit(AFS_VNODE_DELETED, &vnode->flags); - ret = -ESTALE; - } - } - - _leave(" = %d", ret); - return ret; } +#endif /* * prepare to perform part of a write to a page */ int afs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) + loff_t pos, unsigned len, + struct page **_page, void **fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); - struct page *page; - struct key *key = afs_file_key(file); + struct folio *folio; unsigned long priv; - unsigned f, from = pos & (PAGE_SIZE - 1); - unsigned t, to = from + len; - pgoff_t index = pos >> PAGE_SHIFT; + unsigned f, from; + unsigned t, to; + pgoff_t index; int ret; - _enter("{%llx:%llu},{%lx},%u,%u", - vnode->fid.vid, vnode->fid.vnode, index, from, to); + _enter("{%llx:%llu},%llx,%x", + vnode->fid.vid, vnode->fid.vnode, pos, len); - /* We want to store information about how much of a page is altered in - * page->private. + /* Prefetch area to be written into the cache if we're caching this + * file. We need to do this before we get a lock on the page in case + * there's more than one writer competing for the same cache block. */ - BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8); - - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; - - if (!PageUptodate(page) && len != PAGE_SIZE) { - ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); - if (ret < 0) { - unlock_page(page); - put_page(page); - _leave(" = %d [prep]", ret); - return ret; - } - SetPageUptodate(page); - } + ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata); + if (ret < 0) + return ret; - /* page won't leak in error case: it eventually gets cleaned off LRU */ - *pagep = page; + index = folio_index(folio); + from = pos - index * PAGE_SIZE; + to = from + len; try_again: /* See if this page is already partially written in a way that we can * merge the new write with. */ - t = f = 0; - if (PagePrivate(page)) { - priv = page_private(page); - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + if (folio_test_private(folio)) { + priv = (unsigned long)folio_get_private(folio); + f = afs_folio_dirty_from(folio, priv); + t = afs_folio_dirty_to(folio, priv); ASSERTCMP(f, <=, t); - } - if (f != t) { - if (PageWriteback(page)) { - trace_afs_page_dirty(vnode, tracepoint_string("alrdy"), - page->index, priv); + if (folio_test_writeback(folio)) { + trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio); goto flush_conflicting_write; } /* If the file is being filled locally, allow inter-write @@ -138,21 +89,9 @@ try_again: if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) && (to < f || from > t)) goto flush_conflicting_write; - if (from < f) - f = from; - if (to > t) - t = to; - } else { - f = from; - t = to; } - priv = (unsigned long)t << AFS_PRIV_SHIFT; - priv |= f; - trace_afs_page_dirty(vnode, tracepoint_string("begin"), - page->index, priv); - SetPagePrivate(page); - set_page_private(page, priv); + *_page = folio_file_page(folio, pos / PAGE_SIZE); _leave(" = 0"); return 0; @@ -161,18 +100,19 @@ try_again: */ flush_conflicting_write: _debug("flush conflict"); - ret = write_one_page(page); - if (ret < 0) { - _leave(" = %d", ret); - return ret; - } + ret = folio_write_one(folio); + if (ret < 0) + goto error; - ret = lock_page_killable(page); - if (ret < 0) { - _leave(" = %d", ret); - return ret; - } + ret = folio_lock_killable(folio); + if (ret < 0) + goto error; goto try_again; + +error: + folio_put(folio); + _leave(" = %d", ret); + return ret; } /* @@ -180,90 +120,101 @@ flush_conflicting_write: */ int afs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct page *subpage, void *fsdata) { + struct folio *folio = page_folio(subpage); struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); - struct key *key = afs_file_key(file); - loff_t i_size, maybe_i_size; - int ret; + unsigned long priv; + unsigned int f, from = offset_in_folio(folio, pos); + unsigned int t, to = from + copied; + loff_t i_size, write_end_pos; _enter("{%llx:%llu},{%lx}", - vnode->fid.vid, vnode->fid.vnode, page->index); + vnode->fid.vid, vnode->fid.vnode, folio_index(folio)); - maybe_i_size = pos + copied; + if (!folio_test_uptodate(folio)) { + if (copied < len) { + copied = 0; + goto out; + } - i_size = i_size_read(&vnode->vfs_inode); - if (maybe_i_size > i_size) { - spin_lock(&vnode->wb_lock); - i_size = i_size_read(&vnode->vfs_inode); - if (maybe_i_size > i_size) - i_size_write(&vnode->vfs_inode, maybe_i_size); - spin_unlock(&vnode->wb_lock); + folio_mark_uptodate(folio); } - if (!PageUptodate(page)) { - if (copied < len) { - /* Try and load any missing data from the server. The - * unmarshalling routine will take care of clearing any - * bits that are beyond the EOF. - */ - ret = afs_fill_page(vnode, key, pos + copied, - len - copied, page); - if (ret < 0) - goto out; - } - SetPageUptodate(page); + if (copied == 0) + goto out; + + write_end_pos = pos + copied; + + i_size = i_size_read(&vnode->netfs.inode); + if (write_end_pos > i_size) { + write_seqlock(&vnode->cb_lock); + i_size = i_size_read(&vnode->netfs.inode); + if (write_end_pos > i_size) + afs_set_i_size(vnode, write_end_pos); + write_sequnlock(&vnode->cb_lock); + fscache_update_cookie(afs_vnode_cache(vnode), NULL, &write_end_pos); + } + + if (folio_test_private(folio)) { + priv = (unsigned long)folio_get_private(folio); + f = afs_folio_dirty_from(folio, priv); + t = afs_folio_dirty_to(folio, priv); + if (from < f) + f = from; + if (to > t) + t = to; + priv = afs_folio_dirty(folio, f, t); + folio_change_private(folio, (void *)priv); + trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio); + } else { + priv = afs_folio_dirty(folio, from, to); + folio_attach_private(folio, (void *)priv); + trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio); } - set_page_dirty(page); - if (PageDirty(page)) - _debug("dirtied"); - ret = copied; + if (folio_mark_dirty(folio)) + _debug("dirtied %lx", folio_index(folio)); out: - unlock_page(page); - put_page(page); - return ret; + folio_unlock(folio); + folio_put(folio); + return copied; } /* * kill all the pages in the given range */ static void afs_kill_pages(struct address_space *mapping, - pgoff_t first, pgoff_t last) + loff_t start, loff_t len) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); - struct pagevec pv; - unsigned count, loop; + struct folio *folio; + pgoff_t index = start / PAGE_SIZE; + pgoff_t last = (start + len - 1) / PAGE_SIZE, next; - _enter("{%llx:%llu},%lx-%lx", - vnode->fid.vid, vnode->fid.vnode, first, last); - - pagevec_init(&pv); + _enter("{%llx:%llu},%llx @%llx", + vnode->fid.vid, vnode->fid.vnode, len, start); do { - _debug("kill %lx-%lx", first, last); - - count = last - first + 1; - if (count > PAGEVEC_SIZE) - count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); - ASSERTCMP(pv.nr, ==, count); - - for (loop = 0; loop < count; loop++) { - struct page *page = pv.pages[loop]; - ClearPageUptodate(page); - SetPageError(page); - end_page_writeback(page); - if (page->index >= first) - first = page->index + 1; - lock_page(page); - generic_error_remove_page(mapping, page); - unlock_page(page); + _debug("kill %lx (to %lx)", index, last); + + folio = filemap_get_folio(mapping, index); + if (!folio) { + next = index + 1; + continue; } - __pagevec_release(&pv); - } while (first <= last); + next = folio_next_index(folio); + + folio_clear_uptodate(folio); + folio_end_writeback(folio); + folio_lock(folio); + generic_error_remove_page(mapping, &folio->page); + folio_unlock(folio); + folio_put(folio); + + } while (index = next, index <= last); _leave(""); } @@ -273,37 +224,30 @@ static void afs_kill_pages(struct address_space *mapping, */ static void afs_redirty_pages(struct writeback_control *wbc, struct address_space *mapping, - pgoff_t first, pgoff_t last) + loff_t start, loff_t len) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); - struct pagevec pv; - unsigned count, loop; - - _enter("{%llx:%llu},%lx-%lx", - vnode->fid.vid, vnode->fid.vnode, first, last); + struct folio *folio; + pgoff_t index = start / PAGE_SIZE; + pgoff_t last = (start + len - 1) / PAGE_SIZE, next; - pagevec_init(&pv); + _enter("{%llx:%llu},%llx @%llx", + vnode->fid.vid, vnode->fid.vnode, len, start); do { - _debug("redirty %lx-%lx", first, last); + _debug("redirty %llx @%llx", len, start); - count = last - first + 1; - if (count > PAGEVEC_SIZE) - count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(mapping, first, count, pv.pages); - ASSERTCMP(pv.nr, ==, count); - - for (loop = 0; loop < count; loop++) { - struct page *page = pv.pages[loop]; - - redirty_page_for_writepage(wbc, page); - end_page_writeback(page); - if (page->index >= first) - first = page->index + 1; + folio = filemap_get_folio(mapping, index); + if (!folio) { + next = index + 1; + continue; } - __pagevec_release(&pv); - } while (first <= last); + next = index + folio_nr_pages(folio); + folio_redirty_for_writepage(wbc, folio); + folio_end_writeback(folio); + folio_put(folio); + } while (index = next, index <= last); _leave(""); } @@ -311,120 +255,146 @@ static void afs_redirty_pages(struct writeback_control *wbc, /* * completion of write to server */ -static void afs_pages_written_back(struct afs_vnode *vnode, - pgoff_t first, pgoff_t last) +static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len) { - struct pagevec pv; - unsigned long priv; - unsigned count, loop; + struct address_space *mapping = vnode->netfs.inode.i_mapping; + struct folio *folio; + pgoff_t end; - _enter("{%llx:%llu},{%lx-%lx}", - vnode->fid.vid, vnode->fid.vnode, first, last); + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); - pagevec_init(&pv); + _enter("{%llx:%llu},{%x @%llx}", + vnode->fid.vid, vnode->fid.vnode, len, start); - do { - _debug("done %lx-%lx", first, last); - - count = last - first + 1; - if (count > PAGEVEC_SIZE) - count = PAGEVEC_SIZE; - pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, - first, count, pv.pages); - ASSERTCMP(pv.nr, ==, count); - - for (loop = 0; loop < count; loop++) { - priv = page_private(pv.pages[loop]); - trace_afs_page_dirty(vnode, tracepoint_string("clear"), - pv.pages[loop]->index, priv); - set_page_private(pv.pages[loop], 0); - end_page_writeback(pv.pages[loop]); + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each(&xas, folio, end) { + if (!folio_test_writeback(folio)) { + kdebug("bad %x @%llx page %lx %lx", + len, start, folio_index(folio), end); + ASSERT(folio_test_writeback(folio)); } - first += count; - __pagevec_release(&pv); - } while (first <= last); + + trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio); + folio_detach_private(folio); + folio_end_writeback(folio); + } + + rcu_read_unlock(); afs_prune_wb_keys(vnode); _leave(""); } /* - * write to a file + * Find a key to use for the writeback. We cached the keys used to author the + * writes on the vnode. *_wbk will contain the last writeback key used or NULL + * and we need to start from there if it's set. */ -static int afs_store_data(struct address_space *mapping, - pgoff_t first, pgoff_t last, - unsigned offset, unsigned to) +static int afs_get_writeback_key(struct afs_vnode *vnode, + struct afs_wb_key **_wbk) { - struct afs_vnode *vnode = AFS_FS_I(mapping->host); - struct afs_fs_cursor fc; - struct afs_status_cb *scb; struct afs_wb_key *wbk = NULL; struct list_head *p; int ret = -ENOKEY, ret2; - _enter("%s{%llx:%llu.%u},%lx,%lx,%x,%x", - vnode->volume->name, - vnode->fid.vid, - vnode->fid.vnode, - vnode->fid.unique, - first, last, offset, to); - - scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); - if (!scb) - return -ENOMEM; - spin_lock(&vnode->wb_lock); - p = vnode->wb_keys.next; + if (*_wbk) + p = (*_wbk)->vnode_link.next; + else + p = vnode->wb_keys.next; - /* Iterate through the list looking for a valid key to use. */ -try_next_key: while (p != &vnode->wb_keys) { wbk = list_entry(p, struct afs_wb_key, vnode_link); _debug("wbk %u", key_serial(wbk->key)); ret2 = key_validate(wbk->key); - if (ret2 == 0) - goto found_key; + if (ret2 == 0) { + refcount_inc(&wbk->usage); + _debug("USE WB KEY %u", key_serial(wbk->key)); + break; + } + + wbk = NULL; if (ret == -ENOKEY) ret = ret2; p = p->next; } spin_unlock(&vnode->wb_lock); - afs_put_wb_key(wbk); - kfree(scb); - _leave(" = %d [no keys]", ret); - return ret; + if (*_wbk) + afs_put_wb_key(*_wbk); + *_wbk = wbk; + return 0; +} -found_key: - refcount_inc(&wbk->usage); - spin_unlock(&vnode->wb_lock); +static void afs_store_data_success(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; - _debug("USE WB KEY %u", key_serial(wbk->key)); + op->ctime = op->file[0].scb.status.mtime_client; + afs_vnode_commit_status(op, &op->file[0]); + if (op->error == 0) { + if (!op->store.laundering) + afs_pages_written_back(vnode, op->store.pos, op->store.size); + afs_stat_v(vnode, n_stores); + atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes); + } +} - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, wbk->key, false)) { - afs_dataversion_t data_version = vnode->status.data_version + 1; +static const struct afs_operation_ops afs_store_data_operation = { + .issue_afs_rpc = afs_fs_store_data, + .issue_yfs_rpc = yfs_fs_store_data, + .success = afs_store_data_success, +}; - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_store_data(&fc, mapping, first, last, offset, to, scb); - } +/* + * write to a file + */ +static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos, + bool laundering) +{ + struct afs_operation *op; + struct afs_wb_key *wbk = NULL; + loff_t size = iov_iter_count(iter); + int ret = -ENOKEY; + + _enter("%s{%llx:%llu.%u},%llx,%llx", + vnode->volume->name, + vnode->fid.vid, + vnode->fid.vnode, + vnode->fid.unique, + size, pos); - afs_check_for_remote_deletion(&fc, vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, - &data_version, scb); - if (fc.ac.error == 0) - afs_pages_written_back(vnode, first, last); - ret = afs_end_vnode_operation(&fc); + ret = afs_get_writeback_key(vnode, &wbk); + if (ret) { + _leave(" = %d [no keys]", ret); + return ret; } - switch (ret) { - case 0: - afs_stat_v(vnode, n_stores); - atomic_long_add((last * PAGE_SIZE + to) - - (first * PAGE_SIZE + offset), - &afs_v2net(vnode)->n_store_bytes); - break; + op = afs_alloc_operation(wbk->key, vnode->volume); + if (IS_ERR(op)) { + afs_put_wb_key(wbk); + return -ENOMEM; + } + + afs_op_set_vnode(op, 0, vnode); + op->file[0].dv_delta = 1; + op->file[0].modification = true; + op->store.write_iter = iter; + op->store.pos = pos; + op->store.size = size; + op->store.i_size = max(pos + size, vnode->netfs.remote_i_size); + op->store.laundering = laundering; + op->mtime = vnode->netfs.inode.i_mtime; + op->flags |= AFS_OPERATION_UNINTR; + op->ops = &afs_store_data_operation; + +try_next_key: + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + + switch (op->error) { case -EACCES: case -EPERM: case -ENOKEY: @@ -432,156 +402,247 @@ found_key: case -EKEYREJECTED: case -EKEYREVOKED: _debug("next"); - spin_lock(&vnode->wb_lock); - p = wbk->vnode_link.next; - afs_put_wb_key(wbk); - goto try_next_key; + + ret = afs_get_writeback_key(vnode, &wbk); + if (ret == 0) { + key_put(op->key); + op->key = key_get(wbk->key); + goto try_next_key; + } + break; } afs_put_wb_key(wbk); - kfree(scb); - _leave(" = %d", ret); - return ret; + _leave(" = %d", op->error); + return afs_put_operation(op); } /* - * Synchronously write back the locked page and any subsequent non-locked dirty - * pages. + * Extend the region to be written back to include subsequent contiguously + * dirty pages if possible, but don't sleep while doing so. + * + * If this page holds new content, then we can include filler zeros in the + * writeback. */ -static int afs_write_back_from_locked_page(struct address_space *mapping, - struct writeback_control *wbc, - struct page *primary_page, - pgoff_t final_page) +static void afs_extend_writeback(struct address_space *mapping, + struct afs_vnode *vnode, + long *_count, + loff_t start, + loff_t max_len, + bool new_content, + bool caching, + unsigned int *_len) { - struct afs_vnode *vnode = AFS_FS_I(mapping->host); - struct page *pages[8], *page; - unsigned long count, priv; - unsigned n, offset, to, f, t; - pgoff_t start, first, last; - int loop, ret; - - _enter(",%lx", primary_page->index); + struct pagevec pvec; + struct folio *folio; + unsigned long priv; + unsigned int psize, filler = 0; + unsigned int f, t; + loff_t len = *_len; + pgoff_t index = (start + len) / PAGE_SIZE; + bool stop = true; + unsigned int i; - count = 1; - if (test_set_page_writeback(primary_page)) - BUG(); + XA_STATE(xas, &mapping->i_pages, index); + pagevec_init(&pvec); - /* Find all consecutive lockable dirty pages that have contiguous - * written regions, stopping when we find a page that is not - * immediately lockable, is not dirty or is missing, or we reach the - * end of the range. - */ - start = primary_page->index; - priv = page_private(primary_page); - offset = priv & AFS_PRIV_MAX; - to = priv >> AFS_PRIV_SHIFT; - trace_afs_page_dirty(vnode, tracepoint_string("store"), - primary_page->index, priv); - - WARN_ON(offset == to); - if (offset == to) - trace_afs_page_dirty(vnode, tracepoint_string("WARN"), - primary_page->index, priv); - - if (start >= final_page || - (to < PAGE_SIZE && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags))) - goto no_more; - - start++; do { - _debug("more %lx [%lx]", start, count); - n = final_page - start + 1; - if (n > ARRAY_SIZE(pages)) - n = ARRAY_SIZE(pages); - n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages); - _debug("fgpc %u", n); - if (n == 0) - goto no_more; - if (pages[0]->index != start) { - do { - put_page(pages[--n]); - } while (n > 0); - goto no_more; - } + /* Firstly, we gather up a batch of contiguous dirty pages + * under the RCU read lock - but we can't clear the dirty flags + * there if any of those pages are mapped. + */ + rcu_read_lock(); - for (loop = 0; loop < n; loop++) { - page = pages[loop]; - if (to != PAGE_SIZE && - !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) + xas_for_each(&xas, folio, ULONG_MAX) { + stop = true; + if (xas_retry(&xas, folio)) + continue; + if (xa_is_value(folio)) break; - if (page->index > final_page) + if (folio_index(folio) != index) break; - if (!trylock_page(page)) + + if (!folio_try_get_rcu(folio)) { + xas_reset(&xas); + continue; + } + + /* Has the page moved or been split? */ + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); break; - if (!PageDirty(page) || PageWriteback(page)) { - unlock_page(page); + } + + if (!folio_trylock(folio)) { + folio_put(folio); + break; + } + if (!folio_test_dirty(folio) || + folio_test_writeback(folio) || + folio_test_fscache(folio)) { + folio_unlock(folio); + folio_put(folio); break; } - priv = page_private(page); - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; - if (f != 0 && - !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) { - unlock_page(page); + psize = folio_size(folio); + priv = (unsigned long)folio_get_private(folio); + f = afs_folio_dirty_from(folio, priv); + t = afs_folio_dirty_to(folio, priv); + if (f != 0 && !new_content) { + folio_unlock(folio); + folio_put(folio); break; } - to = t; - trace_afs_page_dirty(vnode, tracepoint_string("store+"), - page->index, priv); + len += filler + t; + filler = psize - t; + if (len >= max_len || *_count <= 0) + stop = true; + else if (t == psize || new_content) + stop = false; - if (!clear_page_dirty_for_io(page)) + index += folio_nr_pages(folio); + if (!pagevec_add(&pvec, &folio->page)) + break; + if (stop) + break; + } + + if (!stop) + xas_pause(&xas); + rcu_read_unlock(); + + /* Now, if we obtained any pages, we can shift them to being + * writable and mark them for caching. + */ + if (!pagevec_count(&pvec)) + break; + + for (i = 0; i < pagevec_count(&pvec); i++) { + folio = page_folio(pvec.pages[i]); + trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio); + + if (!folio_clear_dirty_for_io(folio)) BUG(); - if (test_set_page_writeback(page)) + if (folio_start_writeback(folio)) BUG(); - unlock_page(page); - put_page(page); - } - count += loop; - if (loop < n) { - for (; loop < n; loop++) - put_page(pages[loop]); - goto no_more; + afs_folio_start_fscache(caching, folio); + + *_count -= folio_nr_pages(folio); + folio_unlock(folio); } - start += loop; - } while (start <= final_page && count < 65536); + pagevec_release(&pvec); + cond_resched(); + } while (!stop); + + *_len = len; +} + +/* + * Synchronously write back the locked page and any subsequent non-locked dirty + * pages. + */ +static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, + struct writeback_control *wbc, + struct folio *folio, + loff_t start, loff_t end) +{ + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct iov_iter iter; + unsigned long priv; + unsigned int offset, to, len, max_len; + loff_t i_size = i_size_read(&vnode->netfs.inode); + bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); + bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode)); + long count = wbc->nr_to_write; + int ret; + + _enter(",%lx,%llx-%llx", folio_index(folio), start, end); + + if (folio_start_writeback(folio)) + BUG(); + afs_folio_start_fscache(caching, folio); + + count -= folio_nr_pages(folio); + + /* Find all consecutive lockable dirty pages that have contiguous + * written regions, stopping when we find a page that is not + * immediately lockable, is not dirty or is missing, or we reach the + * end of the range. + */ + priv = (unsigned long)folio_get_private(folio); + offset = afs_folio_dirty_from(folio, priv); + to = afs_folio_dirty_to(folio, priv); + trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio); + + len = to - offset; + start += offset; + if (start < i_size) { + /* Trim the write to the EOF; the extra data is ignored. Also + * put an upper limit on the size of a single storedata op. + */ + max_len = 65536 * 4096; + max_len = min_t(unsigned long long, max_len, end - start + 1); + max_len = min_t(unsigned long long, max_len, i_size - start); + + if (len < max_len && + (to == folio_size(folio) || new_content)) + afs_extend_writeback(mapping, vnode, &count, + start, max_len, new_content, + caching, &len); + len = min_t(loff_t, len, max_len); + } -no_more: /* We now have a contiguous set of dirty pages, each with writeback * set; the first page is still locked at this point, but all the rest * have been unlocked. */ - unlock_page(primary_page); + folio_unlock(folio); + + if (start < i_size) { + _debug("write back %x @%llx [%llx]", len, start, i_size); - first = primary_page->index; - last = first + count - 1; + /* Speculatively write to the cache. We have to fix this up + * later if the store fails. + */ + afs_write_to_cache(vnode, start, len, i_size, caching); + + iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len); + ret = afs_store_data(vnode, &iter, start, false); + } else { + _debug("write discard %x @%llx [%llx]", len, start, i_size); - _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); + /* The dirty region was entirely beyond the EOF. */ + fscache_clear_page_bits(mapping, start, len, caching); + afs_pages_written_back(vnode, start, len); + ret = 0; + } - ret = afs_store_data(mapping, first, last, offset, to); switch (ret) { case 0: - ret = count; + wbc->nr_to_write = count; + ret = len; break; default: pr_notice("kAFS: Unexpected error from FS.StoreData %d\n", ret); - /* Fall through */ + fallthrough; case -EACCES: case -EPERM: case -ENOKEY: case -EKEYEXPIRED: case -EKEYREJECTED: case -EKEYREVOKED: - afs_redirty_pages(wbc, mapping, first, last); + case -ENETRESET: + afs_redirty_pages(wbc, mapping, start, len); mapping_set_error(mapping, ret); break; case -EDQUOT: case -ENOSPC: - afs_redirty_pages(wbc, mapping, first, last); + afs_redirty_pages(wbc, mapping, start, len); mapping_set_error(mapping, -ENOSPC); break; @@ -593,7 +654,7 @@ no_more: case -ENOMEDIUM: case -ENXIO: trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail); - afs_kill_pages(mapping, first, last); + afs_kill_pages(mapping, start, len); mapping_set_error(mapping, ret); break; } @@ -606,21 +667,26 @@ no_more: * write a page back to the server * - the caller locked the page for us */ -int afs_writepage(struct page *page, struct writeback_control *wbc) +int afs_writepage(struct page *subpage, struct writeback_control *wbc) { - int ret; + struct folio *folio = page_folio(subpage); + ssize_t ret; + loff_t start; + + _enter("{%lx},", folio_index(folio)); - _enter("{%lx},", page->index); +#ifdef CONFIG_AFS_FSCACHE + folio_wait_fscache(folio); +#endif - ret = afs_write_back_from_locked_page(page->mapping, wbc, page, - wbc->range_end >> PAGE_SHIFT); + start = folio_index(folio) * PAGE_SIZE; + ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc, + folio, start, LLONG_MAX - start); if (ret < 0) { - _leave(" = %d", ret); - return 0; + _leave(" = %zd", ret); + return ret; } - wbc->nr_to_write -= ret; - _leave(" = 0"); return 0; } @@ -630,64 +696,90 @@ int afs_writepage(struct page *page, struct writeback_control *wbc) */ static int afs_writepages_region(struct address_space *mapping, struct writeback_control *wbc, - pgoff_t index, pgoff_t end, pgoff_t *_next) + loff_t start, loff_t end, loff_t *_next) { - struct page *page; - int ret, n; + struct folio *folio; + struct page *head_page; + ssize_t ret; + int n, skips = 0; - _enter(",,%lx,%lx,", index, end); + _enter("%llx,%llx,", start, end); do { - n = find_get_pages_range_tag(mapping, &index, end, - PAGECACHE_TAG_DIRTY, 1, &page); + pgoff_t index = start / PAGE_SIZE; + + n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE, + PAGECACHE_TAG_DIRTY, 1, &head_page); if (!n) break; - _debug("wback %lx", page->index); + folio = page_folio(head_page); + start = folio_pos(folio); /* May regress with THPs */ - /* - * at this point we hold neither the i_pages lock nor the + _debug("wback %lx", folio_index(folio)); + + /* At this point we hold neither the i_pages lock nor the * page lock: the page may be truncated or invalidated * (changing page->mapping to NULL), or even swizzled * back from swapper_space to tmpfs file mapping */ - ret = lock_page_killable(page); - if (ret < 0) { - put_page(page); - _leave(" = %d", ret); - return ret; + if (wbc->sync_mode != WB_SYNC_NONE) { + ret = folio_lock_killable(folio); + if (ret < 0) { + folio_put(folio); + return ret; + } + } else { + if (!folio_trylock(folio)) { + folio_put(folio); + return 0; + } } - if (page->mapping != mapping || !PageDirty(page)) { - unlock_page(page); - put_page(page); + if (folio_mapping(folio) != mapping || + !folio_test_dirty(folio)) { + start += folio_size(folio); + folio_unlock(folio); + folio_put(folio); continue; } - if (PageWriteback(page)) { - unlock_page(page); - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - put_page(page); + if (folio_test_writeback(folio) || + folio_test_fscache(folio)) { + folio_unlock(folio); + if (wbc->sync_mode != WB_SYNC_NONE) { + folio_wait_writeback(folio); +#ifdef CONFIG_AFS_FSCACHE + folio_wait_fscache(folio); +#endif + } else { + start += folio_size(folio); + } + folio_put(folio); + if (wbc->sync_mode == WB_SYNC_NONE) { + if (skips >= 5 || need_resched()) + break; + skips++; + } continue; } - if (!clear_page_dirty_for_io(page)) + if (!folio_clear_dirty_for_io(folio)) BUG(); - ret = afs_write_back_from_locked_page(mapping, wbc, page, end); - put_page(page); + ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end); + folio_put(folio); if (ret < 0) { - _leave(" = %d", ret); + _leave(" = %zd", ret); return ret; } - wbc->nr_to_write -= ret; + start += ret; cond_resched(); - } while (index < end && wbc->nr_to_write > 0); + } while (wbc->nr_to_write > 0); - *_next = index; - _leave(" = 0 [%lx]", *_next); + *_next = start; + _leave(" = 0 [%llx]", *_next); return 0; } @@ -697,30 +789,44 @@ static int afs_writepages_region(struct address_space *mapping, int afs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - pgoff_t start, end, next; + struct afs_vnode *vnode = AFS_FS_I(mapping->host); + loff_t start, next; int ret; _enter(""); + /* We have to be careful as we can end up racing with setattr() + * truncating the pagecache since the caller doesn't take a lock here + * to prevent it. + */ + if (wbc->sync_mode == WB_SYNC_ALL) + down_read(&vnode->validate_lock); + else if (!down_read_trylock(&vnode->validate_lock)) + return 0; + if (wbc->range_cyclic) { - start = mapping->writeback_index; - end = -1; - ret = afs_writepages_region(mapping, wbc, start, end, &next); - if (start > 0 && wbc->nr_to_write > 0 && ret == 0) - ret = afs_writepages_region(mapping, wbc, 0, start, - &next); - mapping->writeback_index = next; + start = mapping->writeback_index * PAGE_SIZE; + ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); + if (ret == 0) { + mapping->writeback_index = next / PAGE_SIZE; + if (start > 0 && wbc->nr_to_write > 0) { + ret = afs_writepages_region(mapping, wbc, 0, + start, &next); + if (ret == 0) + mapping->writeback_index = + next / PAGE_SIZE; + } + } } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { - end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT); - ret = afs_writepages_region(mapping, wbc, 0, end, &next); - if (wbc->nr_to_write > 0) - mapping->writeback_index = next; + ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); + if (wbc->nr_to_write > 0 && ret == 0) + mapping->writeback_index = next / PAGE_SIZE; } else { - start = wbc->range_start >> PAGE_SHIFT; - end = wbc->range_end >> PAGE_SHIFT; - ret = afs_writepages_region(mapping, wbc, start, end, &next); + ret = afs_writepages_region(mapping, wbc, + wbc->range_start, wbc->range_end, &next); } + up_read(&vnode->validate_lock); _leave(" = %d", ret); return ret; } @@ -731,13 +837,14 @@ int afs_writepages(struct address_space *mapping, ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) { struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); + struct afs_file *af = iocb->ki_filp->private_data; ssize_t result; size_t count = iov_iter_count(from); _enter("{%llx:%llu},{%zu},", vnode->fid.vid, vnode->fid.vnode, count); - if (IS_SWAPFILE(&vnode->vfs_inode)) { + if (IS_SWAPFILE(&vnode->netfs.inode)) { printk(KERN_INFO "AFS: Attempt to write to active swap file!\n"); return -EBUSY; @@ -746,6 +853,10 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) if (!count) return 0; + result = afs_validate(vnode, af->key); + if (result < 0) + return result; + result = generic_file_write_iter(iocb, from); _leave(" = %zd", result); @@ -759,13 +870,18 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) */ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = file_inode(file); - struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct afs_file *af = file->private_data; + int ret; _enter("{%llx:%llu},{n=%pD},%d", vnode->fid.vid, vnode->fid.vnode, file, datasync); + ret = afs_validate(vnode, af->key); + if (ret < 0) + return ret; + return file_write_and_wait_range(file, start, end); } @@ -775,13 +891,17 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) { + struct folio *folio = page_folio(vmf->page); struct file *file = vmf->vma->vm_file; struct inode *inode = file_inode(file); struct afs_vnode *vnode = AFS_FS_I(inode); + struct afs_file *af = file->private_data; unsigned long priv; + vm_fault_t ret = VM_FAULT_RETRY; + + _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio)); - _enter("{{%llx:%llu}},{%lx}", - vnode->fid.vid, vnode->fid.vnode, vmf->page->index); + afs_validate(vnode, af->key); sb_start_pagefault(inode->i_sb); @@ -789,31 +909,41 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) * be modified. We then assume the entire page will need writing back. */ #ifdef CONFIG_AFS_FSCACHE - fscache_wait_on_page_write(vnode->cache, vmf->page); + if (folio_test_fscache(folio) && + folio_wait_fscache_killable(folio) < 0) + goto out; #endif - if (PageWriteback(vmf->page) && - wait_on_page_bit_killable(vmf->page, PG_writeback) < 0) - return VM_FAULT_RETRY; + if (folio_wait_writeback_killable(folio)) + goto out; - if (lock_page_killable(vmf->page) < 0) - return VM_FAULT_RETRY; + if (folio_lock_killable(folio) < 0) + goto out; - /* We mustn't change page->private until writeback is complete as that + /* We mustn't change folio->private until writeback is complete as that * details the portion of the page we need to write back and we might * need to redirty the page if there's a problem. */ - wait_on_page_writeback(vmf->page); + if (folio_wait_writeback_killable(folio) < 0) { + folio_unlock(folio); + goto out; + } - priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */ - priv |= 0; /* From */ - trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), - vmf->page->index, priv); - SetPagePrivate(vmf->page); - set_page_private(vmf->page, priv); + priv = afs_folio_dirty(folio, 0, folio_size(folio)); + priv = afs_folio_dirty_mmapped(priv); + if (folio_test_private(folio)) { + folio_change_private(folio, (void *)priv); + trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio); + } else { + folio_attach_private(folio, (void *)priv); + trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio); + } + file_update_time(file); + ret = VM_FAULT_LOCKED; +out: sb_end_pagefault(inode->i_sb); - return VM_FAULT_LOCKED; + return ret; } /* @@ -827,8 +957,8 @@ void afs_prune_wb_keys(struct afs_vnode *vnode) /* Discard unused keys */ spin_lock(&vnode->wb_lock); - if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) && - !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) { + if (!mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_WRITEBACK) && + !mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_DIRTY)) { list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) { if (refcount_read(&wbk->usage) == 1) list_move(&wbk->vnode_link, &graveyard); @@ -847,40 +977,62 @@ void afs_prune_wb_keys(struct afs_vnode *vnode) /* * Clean up a page during invalidation. */ -int afs_launder_page(struct page *page) +int afs_launder_folio(struct folio *folio) { - struct address_space *mapping = page->mapping; - struct afs_vnode *vnode = AFS_FS_I(mapping->host); + struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); + struct iov_iter iter; + struct bio_vec bv[1]; unsigned long priv; unsigned int f, t; int ret = 0; - _enter("{%lx}", page->index); + _enter("{%lx}", folio->index); - priv = page_private(page); - if (clear_page_dirty_for_io(page)) { + priv = (unsigned long)folio_get_private(folio); + if (folio_clear_dirty_for_io(folio)) { f = 0; - t = PAGE_SIZE; - if (PagePrivate(page)) { - f = priv & AFS_PRIV_MAX; - t = priv >> AFS_PRIV_SHIFT; + t = folio_size(folio); + if (folio_test_private(folio)) { + f = afs_folio_dirty_from(folio, priv); + t = afs_folio_dirty_to(folio, priv); } - trace_afs_page_dirty(vnode, tracepoint_string("launder"), - page->index, priv); - ret = afs_store_data(mapping, page->index, page->index, t, f); - } - - trace_afs_page_dirty(vnode, tracepoint_string("laundered"), - page->index, priv); - set_page_private(page, 0); - ClearPagePrivate(page); + bv[0].bv_page = &folio->page; + bv[0].bv_offset = f; + bv[0].bv_len = t - f; + iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len); -#ifdef CONFIG_AFS_FSCACHE - if (PageFsCache(page)) { - fscache_wait_on_page_write(vnode->cache, page); - fscache_uncache_page(vnode->cache, page); + trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio); + ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true); } -#endif + + trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio); + folio_detach_private(folio); + folio_wait_fscache(folio); return ret; } + +/* + * Deal with the completion of writing the data to the cache. + */ +static void afs_write_to_cache_done(void *priv, ssize_t transferred_or_error, + bool was_async) +{ + struct afs_vnode *vnode = priv; + + if (IS_ERR_VALUE(transferred_or_error) && + transferred_or_error != -ENOBUFS) + afs_invalidate_cache(vnode, 0); +} + +/* + * Save the write to the cache also. + */ +static void afs_write_to_cache(struct afs_vnode *vnode, + loff_t start, size_t len, loff_t i_size, + bool caching) +{ + fscache_write_to_cache(afs_vnode_cache(vnode), + vnode->netfs.inode.i_mapping, start, len, i_size, + afs_write_to_cache_done, vnode, caching); +} diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 7af41fd5f3ee..7751b0b3f81d 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -11,29 +11,25 @@ #include <linux/xattr.h> #include "internal.h" -static const char afs_xattr_list[] = - "afs.acl\0" - "afs.cell\0" - "afs.fid\0" - "afs.volume\0" - "afs.yfs.acl\0" - "afs.yfs.acl_inherited\0" - "afs.yfs.acl_num_cleaned\0" - "afs.yfs.vol_acl"; - /* - * Retrieve a list of the supported xattrs. + * Deal with the result of a successful fetch ACL operation. */ -ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size) +static void afs_acl_success(struct afs_operation *op) { - if (size == 0) - return sizeof(afs_xattr_list); - if (size < sizeof(afs_xattr_list)) - return -ERANGE; - memcpy(buffer, afs_xattr_list, sizeof(afs_xattr_list)); - return sizeof(afs_xattr_list); + afs_vnode_commit_status(op, &op->file[0]); } +static void afs_acl_put(struct afs_operation *op) +{ + kfree(op->acl); +} + +static const struct afs_operation_ops afs_fetch_acl_operation = { + .issue_afs_rpc = afs_fs_fetch_acl, + .success = afs_acl_success, + .put = afs_acl_put, +}; + /* * Get a file's ACL. */ @@ -42,37 +38,23 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler, struct inode *inode, const char *name, void *buffer, size_t size) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_acl *acl = NULL; - struct key *key; - int ret = -ENOMEM; - - scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); - if (!scb) - goto error; + int ret; - key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_scb; - } + op = afs_alloc_operation(NULL, vnode->volume); + if (IS_ERR(op)) + return -ENOMEM; - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, true)) { - afs_dataversion_t data_version = vnode->status.data_version; + afs_op_set_vnode(op, 0, vnode); + op->ops = &afs_fetch_acl_operation; - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - acl = afs_fs_fetch_acl(&fc, scb); - } - - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, - &data_version, scb); - ret = afs_end_vnode_operation(&fc); - } + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + acl = op->acl; + op->acl = NULL; + ret = afs_put_operation(op); if (ret == 0) { ret = acl->size; @@ -82,73 +64,60 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler, else ret = -ERANGE; } - kfree(acl); } - key_put(key); -error_scb: - kfree(scb); -error: + kfree(acl); return ret; } +static bool afs_make_acl(struct afs_operation *op, + const void *buffer, size_t size) +{ + struct afs_acl *acl; + + acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL); + if (!acl) { + afs_op_nomem(op); + return false; + } + + acl->size = size; + memcpy(acl->data, buffer, size); + op->acl = acl; + return true; +} + +static const struct afs_operation_ops afs_store_acl_operation = { + .issue_afs_rpc = afs_fs_store_acl, + .success = afs_acl_success, + .put = afs_acl_put, +}; + /* * Set a file's AFS3 ACL. */ static int afs_xattr_set_acl(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(inode); - struct afs_acl *acl = NULL; - struct key *key; - int ret = -ENOMEM; if (flags == XATTR_CREATE) return -EINVAL; - scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); - if (!scb) - goto error; - - acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL); - if (!acl) - goto error_scb; - - key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_acl; - } - - acl->size = size; - memcpy(acl->data, buffer, size); + op = afs_alloc_operation(NULL, vnode->volume); + if (IS_ERR(op)) + return -ENOMEM; - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, true)) { - afs_dataversion_t data_version = vnode->status.data_version; + afs_op_set_vnode(op, 0, vnode); + if (!afs_make_acl(op, buffer, size)) + return afs_put_operation(op); - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - afs_fs_store_acl(&fc, acl, scb); - } - - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, - &data_version, scb); - ret = afs_end_vnode_operation(&fc); - } - - key_put(key); -error_acl: - kfree(acl); -error_scb: - kfree(scb); -error: - return ret; + op->ops = &afs_store_acl_operation; + return afs_do_sync_operation(op); } static const struct xattr_handler afs_xattr_afs_acl_handler = { @@ -157,6 +126,12 @@ static const struct xattr_handler afs_xattr_afs_acl_handler = { .set = afs_xattr_set_acl, }; +static const struct afs_operation_ops yfs_fetch_opaque_acl_operation = { + .issue_yfs_rpc = yfs_fs_fetch_opaque_acl, + .success = afs_acl_success, + /* Don't free op->yacl in .put here */ +}; + /* * Get a file's YFS ACL. */ @@ -165,11 +140,9 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, struct inode *inode, const char *name, void *buffer, size_t size) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(inode); struct yfs_acl *yacl = NULL; - struct key *key; char buf[16], *data; int which = 0, dsize, ret = -ENOMEM; @@ -193,132 +166,93 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, else if (which == 3) yacl->flags |= YFS_ACL_WANT_VOL_ACL; - scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); - if (!scb) + op = afs_alloc_operation(NULL, vnode->volume); + if (IS_ERR(op)) goto error_yacl; - key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_scb; - } + afs_op_set_vnode(op, 0, vnode); + op->yacl = yacl; + op->ops = &yfs_fetch_opaque_acl_operation; - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, true)) { - afs_dataversion_t data_version = vnode->status.data_version; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + ret = afs_put_operation(op); - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - yfs_fs_fetch_opaque_acl(&fc, yacl, scb); + if (ret == 0) { + switch (which) { + case 0: + data = yacl->acl->data; + dsize = yacl->acl->size; + break; + case 1: + data = buf; + dsize = scnprintf(buf, sizeof(buf), "%u", yacl->inherit_flag); + break; + case 2: + data = buf; + dsize = scnprintf(buf, sizeof(buf), "%u", yacl->num_cleaned); + break; + case 3: + data = yacl->vol_acl->data; + dsize = yacl->vol_acl->size; + break; + default: + ret = -EOPNOTSUPP; + goto error_yacl; } - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, - &data_version, scb); - ret = afs_end_vnode_operation(&fc); - } - - if (ret < 0) - goto error_key; - - switch (which) { - case 0: - data = yacl->acl->data; - dsize = yacl->acl->size; - break; - case 1: - data = buf; - dsize = scnprintf(buf, sizeof(buf), "%u", yacl->inherit_flag); - break; - case 2: - data = buf; - dsize = scnprintf(buf, sizeof(buf), "%u", yacl->num_cleaned); - break; - case 3: - data = yacl->vol_acl->data; - dsize = yacl->vol_acl->size; - break; - default: - ret = -EOPNOTSUPP; - goto error_key; - } - - ret = dsize; - if (size > 0) { - if (dsize > size) { - ret = -ERANGE; - goto error_key; + ret = dsize; + if (size > 0) { + if (dsize <= size) + memcpy(buffer, data, dsize); + else + ret = -ERANGE; } - memcpy(buffer, data, dsize); + } else if (ret == -ENOTSUPP) { + ret = -ENODATA; } -error_key: - key_put(key); -error_scb: - kfree(scb); error_yacl: yfs_free_opaque_acl(yacl); error: return ret; } +static const struct afs_operation_ops yfs_store_opaque_acl2_operation = { + .issue_yfs_rpc = yfs_fs_store_opaque_acl2, + .success = afs_acl_success, + .put = afs_acl_put, +}; + /* * Set a file's YFS ACL. */ static int afs_xattr_set_yfs(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags) { - struct afs_fs_cursor fc; - struct afs_status_cb *scb; + struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(inode); - struct afs_acl *acl = NULL; - struct key *key; - int ret = -ENOMEM; + int ret; if (flags == XATTR_CREATE || strcmp(name, "acl") != 0) return -EINVAL; - scb = kzalloc(sizeof(struct afs_status_cb), GFP_NOFS); - if (!scb) - goto error; + op = afs_alloc_operation(NULL, vnode->volume); + if (IS_ERR(op)) + return -ENOMEM; - acl = kmalloc(sizeof(*acl) + size, GFP_KERNEL); - if (!acl) - goto error_scb; + afs_op_set_vnode(op, 0, vnode); + if (!afs_make_acl(op, buffer, size)) + return afs_put_operation(op); - acl->size = size; - memcpy(acl->data, buffer, size); - - key = afs_request_key(vnode->volume->cell); - if (IS_ERR(key)) { - ret = PTR_ERR(key); - goto error_acl; - } - - ret = -ERESTARTSYS; - if (afs_begin_vnode_operation(&fc, vnode, key, true)) { - afs_dataversion_t data_version = vnode->status.data_version; - - while (afs_select_fileserver(&fc)) { - fc.cb_break = afs_calc_vnode_cb_break(vnode); - yfs_fs_store_opaque_acl2(&fc, acl, scb); - } - - afs_check_for_remote_deletion(&fc, fc.vnode); - afs_vnode_commit_status(&fc, vnode, fc.cb_break, - &data_version, scb); - ret = afs_end_vnode_operation(&fc); - } - -error_acl: - kfree(acl); - key_put(key); -error_scb: - kfree(scb); -error: + op->ops = &yfs_store_opaque_acl2_operation; + ret = afs_do_sync_operation(op); + if (ret == -ENOTSUPP) + ret = -ENODATA; return ret; } diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h index 94f1f398eefa..8ca868164507 100644 --- a/fs/afs/xdr_fs.h +++ b/fs/afs/xdr_fs.h @@ -54,10 +54,16 @@ union afs_xdr_dirent { __be16 hash_next; __be32 vnode; __be32 unique; - u8 name[16]; - u8 overflow[4]; /* if any char of the name (inc - * NUL) reaches here, consume - * the next dirent too */ + u8 name[]; + /* When determining the number of dirent slots needed to + * represent a directory entry, name should be assumed to be 16 + * bytes, due to a now-standardised (mis)calculation, but it is + * in fact 20 bytes in size. afs_dir_calc_slots() should be + * used for this. + * + * For names longer than (16 or) 20 bytes, extra slots should + * be annexed to this one using the extended_name format. + */ } u; u8 extended_name[32]; } __packed; @@ -96,4 +102,15 @@ struct afs_xdr_dir_page { union afs_xdr_dir_block blocks[AFS_DIR_BLOCKS_PER_PAGE]; }; +/* + * Calculate the number of dirent slots required for any given name length. + * The calculation is made assuming the part of the name in the first slot is + * 16 bytes, rather than 20, but this miscalculation is now standardised. + */ +static inline unsigned int afs_dir_calc_slots(size_t name_len) +{ + name_len++; /* NUL-terminated */ + return 1 + ((name_len + 15) / AFS_DIR_DIRENT_SIZE); +} + #endif /* XDR_FS_H */ diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index a26126ac7bf1..11571cca86c1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -15,13 +15,6 @@ #include "xdr_fs.h" #include "protocol_yfs.h" -static const struct afs_fid afs_zero_fid; - -static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi) -{ - call->cbi = afs_get_cb_interest(cbi); -} - #define xdr_size(x) (sizeof(*x) / sizeof(__be32)) static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid) @@ -79,31 +72,29 @@ static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len) return bp + len / sizeof(__be32); } -static s64 linux_to_yfs_time(const struct timespec64 *t) +static __be32 *xdr_encode_name(__be32 *bp, const struct qstr *p) { - /* Convert to 100ns intervals. */ - return (u64)t->tv_sec * 10000000 + t->tv_nsec/100; + return xdr_encode_string(bp, p->name, p->len); } -static __be32 *xdr_encode_YFSStoreStatus_mode(__be32 *bp, mode_t mode) +static s64 linux_to_yfs_time(const struct timespec64 *t) { - struct yfs_xdr_YFSStoreStatus *x = (void *)bp; - - x->mask = htonl(AFS_SET_MODE); - x->mode = htonl(mode & S_IALLUGO); - x->mtime_client = u64_to_xdr(0); - x->owner = u64_to_xdr(0); - x->group = u64_to_xdr(0); - return bp + xdr_size(x); + /* Convert to 100ns intervals. */ + return (u64)t->tv_sec * 10000000 + t->tv_nsec/100; } -static __be32 *xdr_encode_YFSStoreStatus_mtime(__be32 *bp, const struct timespec64 *t) +static __be32 *xdr_encode_YFSStoreStatus(__be32 *bp, mode_t *mode, + const struct timespec64 *t) { struct yfs_xdr_YFSStoreStatus *x = (void *)bp; + mode_t masked_mode = mode ? *mode & S_IALLUGO : 0; s64 mtime = linux_to_yfs_time(t); + u32 mask = AFS_SET_MTIME; + + mask |= mode ? AFS_SET_MODE : 0; - x->mask = htonl(AFS_SET_MTIME); - x->mode = htonl(0); + x->mask = htonl(mask); + x->mode = htonl(masked_mode); x->mtime_client = u64_to_xdr(mtime); x->owner = u64_to_xdr(0); x->group = u64_to_xdr(0); @@ -165,23 +156,23 @@ static void xdr_dump_bad(const __be32 *bp) int i; pr_notice("YFS XDR: Bad status record\n"); - for (i = 0; i < 5 * 4 * 4; i += 16) { + for (i = 0; i < 6 * 4 * 4; i += 16) { memcpy(x, bp, 16); bp += 4; pr_notice("%03x: %08x %08x %08x %08x\n", i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3])); } - memcpy(x, bp, 4); - pr_notice("0x50: %08x\n", ntohl(x[0])); + memcpy(x, bp, 8); + pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1])); } /* * Decode a YFSFetchStatus block */ -static int xdr_decode_YFSFetchStatus(const __be32 **_bp, - struct afs_call *call, - struct afs_status_cb *scb) +static void xdr_decode_YFSFetchStatus(const __be32 **_bp, + struct afs_call *call, + struct afs_status_cb *scb) { const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp; struct afs_file_status *status = &scb->status; @@ -192,7 +183,7 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, if (status->abort_code == VNOVNODE) status->nlink = 0; scb->have_error = true; - return 0; + goto advance; } type = ntohl(xdr->type); @@ -220,13 +211,14 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp, status->size = xdr_to_u64(xdr->size); status->data_version = xdr_to_u64(xdr->data_version); scb->have_status = true; - +advance: *_bp += xdr_size(xdr); - return 0; + return; bad: xdr_dump_bad(*_bp); - return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status); + afs_protocol_error(call, afs_eproto_bad_status); + goto advance; } /* @@ -240,8 +232,7 @@ static void xdr_decode_YFSCallBack(const __be32 **_bp, struct afs_callback *cb = &scb->callback; ktime_t cb_expiry; - cb_expiry = call->reply_time; - cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100); + cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100); cb->expires_at = ktime_divns(cb_expiry, NSEC_PER_SEC); scb->have_cb = true; *_bp += xdr_size(x); @@ -331,35 +322,12 @@ static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp, } /* - * Deliver a reply that's a status, callback and volsync. - */ -static int yfs_deliver_fs_status_cb_and_volsync(struct afs_call *call) -{ - const __be32 *bp; - int ret; - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; - - /* unmarshall the reply once we've received all of it */ - bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_YFSCallBack(&bp, call, call->out_scb); - xdr_decode_YFSVolSync(&bp, call->out_volsync); - - _leave(" = 0 [done]"); - return 0; -} - -/* * Deliver reply data to operations that just return a file status and a volume * sync record. */ static int yfs_deliver_status_and_volsync(struct afs_call *call) { + struct afs_operation *op = call->op; const __be32 *bp; int ret; @@ -368,91 +336,39 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSFetchStatus(&bp, call, &op->file[0].scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; } /* - * YFS.FetchStatus operation type - */ -static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = { - .name = "YFS.FetchStatus(vnode)", - .op = yfs_FS_FetchStatus, - .deliver = yfs_deliver_fs_status_cb_and_volsync, - .destructor = afs_flat_call_destructor, -}; - -/* - * Fetch the status information for a file. - */ -int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_status_cb *scb, - struct afs_volsync *volsync) -{ - struct afs_vnode *vnode = fc->vnode; - struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); - __be32 *bp; - - _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - - call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus_vnode, - sizeof(__be32) * 2 + - sizeof(struct yfs_xdr_YFSFid), - sizeof(struct yfs_xdr_YFSFetchStatus) + - sizeof(struct yfs_xdr_YFSCallBack) + - sizeof(struct yfs_xdr_YFSVolSync)); - if (!call) { - fc->ac.error = -ENOMEM; - return -ENOMEM; - } - - call->key = fc->key; - call->out_scb = scb; - call->out_volsync = volsync; - - /* marshall the parameters */ - bp = call->request; - bp = xdr_encode_u32(bp, YFSFETCHSTATUS); - bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); - yfs_check_req(call, bp); - - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); -} - -/* * Deliver reply data to an YFS.FetchData64. */ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) { - struct afs_read *req = call->read_request; + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + struct afs_read *req = op->fetch.req; const __be32 *bp; - unsigned int size; int ret; - _enter("{%u,%zu/%llu}", - call->unmarshall, iov_iter_count(call->iter), req->actual_len); + _enter("{%u,%zu, %zu/%llu}", + call->unmarshall, call->iov_len, iov_iter_count(call->iter), + req->actual_len); switch (call->unmarshall) { case 0: req->actual_len = 0; - req->index = 0; - req->offset = req->pos & (PAGE_SIZE - 1); afs_extract_to_tmp64(call); call->unmarshall++; - /* Fall through */ + fallthrough; - /* extract the returned data length */ + /* Extract the returned data length into ->actual_len. This + * may indicate more or less data than was requested will be + * returned. + */ case 1: _debug("extract data length"); ret = afs_extract_data(call, true); @@ -461,53 +377,32 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) req->actual_len = be64_to_cpu(call->tmp64); _debug("DATA length: %llu", req->actual_len); - req->remain = min(req->len, req->actual_len); - if (req->remain == 0) + + if (req->actual_len == 0) goto no_more_data; + call->iter = req->iter; + call->iov_len = min(req->actual_len, req->len); call->unmarshall++; - - begin_page: - ASSERTCMP(req->index, <, req->nr_pages); - if (req->remain > PAGE_SIZE - req->offset) - size = PAGE_SIZE - req->offset; - else - size = req->remain; - call->bvec[0].bv_len = size; - call->bvec[0].bv_offset = req->offset; - call->bvec[0].bv_page = req->pages[req->index]; - iov_iter_bvec(&call->def_iter, READ, call->bvec, 1, size); - ASSERTCMP(size, <=, PAGE_SIZE); - /* Fall through */ + fallthrough; /* extract the returned data */ case 2: _debug("extract data %zu/%llu", - iov_iter_count(call->iter), req->remain); + iov_iter_count(call->iter), req->actual_len); ret = afs_extract_data(call, true); if (ret < 0) return ret; - req->remain -= call->bvec[0].bv_len; - req->offset += call->bvec[0].bv_len; - ASSERTCMP(req->offset, <=, PAGE_SIZE); - if (req->offset == PAGE_SIZE) { - req->offset = 0; - if (req->page_done) - req->page_done(req); - req->index++; - if (req->remain > 0) - goto begin_page; - } - ASSERTCMP(req->remain, ==, 0); + call->iter = &call->def_iter; if (req->actual_len <= req->len) goto no_more_data; /* Discard any excess data the server gave us */ afs_extract_discard(call, req->actual_len - req->len); call->unmarshall = 3; - /* Fall through */ + fallthrough; case 3: _debug("extract discard %zu/%llu", @@ -523,7 +418,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSCallBack) + sizeof(struct yfs_xdr_YFSVolSync)); - /* Fall through */ + fallthrough; /* extract the metadata */ case 4: @@ -532,41 +427,24 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_YFSCallBack(&bp, call, call->out_scb); - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSCallBack(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); - req->data_version = call->out_scb->status.data_version; - req->file_size = call->out_scb->status.size; + req->data_version = vp->scb.status.data_version; + req->file_size = vp->scb.status.size; call->unmarshall++; - /* Fall through */ + fallthrough; case 5: break; } - for (; req->index < req->nr_pages; req->index++) { - if (req->offset < PAGE_SIZE) - zero_user_segment(req->pages[req->index], - req->offset, PAGE_SIZE); - if (req->page_done) - req->page_done(req); - req->offset = 0; - } - _leave(" = 0 [done]"); return 0; } -static void yfs_fetch_data_destructor(struct afs_call *call) -{ - afs_put_read(call->read_request); - afs_flat_call_destructor(call); -} - /* * YFS.FetchData64 operation type */ @@ -574,25 +452,24 @@ static const struct afs_call_type yfs_RXYFSFetchData64 = { .name = "YFS.FetchData64", .op = yfs_FS_FetchData64, .deliver = yfs_deliver_fs_fetch_data64, - .destructor = yfs_fetch_data_destructor, + .destructor = afs_flat_call_destructor, }; /* * Fetch data from a file. */ -int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_status_cb *scb, - struct afs_read *req) +void yfs_fs_fetch_data(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; + struct afs_read *req = op->fetch.req; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%llx:%llu},%llx,%llx", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode, + key_serial(op->key), vp->fid.vid, vp->fid.vnode, req->pos, req->len); - call = afs_alloc_flat_call(net, &yfs_RXYFSFetchData64, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid) + sizeof(struct yfs_xdr_u64) * 2, @@ -600,27 +477,21 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_status_cb *scb, sizeof(struct yfs_xdr_YFSCallBack) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; + return afs_op_nomem(op); - call->key = fc->key; - call->out_scb = scb; - call->out_volsync = NULL; - call->read_request = afs_get_read(req); + req->call_debug_id = call->debug_id; /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSFETCHDATA64); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); bp = xdr_encode_u64(bp, req->pos); bp = xdr_encode_u64(bp, req->len); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -628,6 +499,9 @@ int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_status_cb *scb, */ static int yfs_deliver_fs_create_vnode(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; const __be32 *bp; int ret; @@ -639,15 +513,11 @@ static int yfs_deliver_fs_create_vnode(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_YFSFid(&bp, call->out_fid); - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - xdr_decode_YFSCallBack(&bp, call, call->out_scb); - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSFid(&bp, &op->file[1].fid); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSCallBack(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -666,26 +536,20 @@ static const struct afs_call_type afs_RXFSCreateFile = { /* * Create a file. */ -int yfs_fs_create_file(struct afs_fs_cursor *fc, - const char *name, - umode_t mode, - struct afs_status_cb *dvnode_scb, - struct afs_fid *newfid, - struct afs_status_cb *new_scb) +void yfs_fs_create_file(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(dvnode); - size_t namesz, reqsz, rplsz; + size_t reqsz, rplsz; __be32 *bp; _enter(""); - namesz = strlen(name); reqsz = (sizeof(__be32) + sizeof(__be32) + sizeof(struct yfs_xdr_YFSFid) + - xdr_strlen(namesz) + + xdr_strlen(name->len) + sizeof(struct yfs_xdr_YFSStoreStatus) + sizeof(__be32)); rplsz = (sizeof(struct yfs_xdr_YFSFid) + @@ -694,30 +558,22 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc, sizeof(struct yfs_xdr_YFSCallBack) + sizeof(struct yfs_xdr_YFSVolSync)); - call = afs_alloc_flat_call(net, &afs_RXFSCreateFile, reqsz, rplsz); + call = afs_alloc_flat_call(op->net, &afs_RXFSCreateFile, reqsz, rplsz); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; - call->out_fid = newfid; - call->out_scb = new_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSCREATEFILE); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &dvnode->fid); - bp = xdr_encode_string(bp, name, namesz); - bp = xdr_encode_YFSStoreStatus_mode(bp, mode); + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime); bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */ yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &dvnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } static const struct afs_call_type yfs_RXFSMakeDir = { @@ -730,26 +586,20 @@ static const struct afs_call_type yfs_RXFSMakeDir = { /* * Make a directory. */ -int yfs_fs_make_dir(struct afs_fs_cursor *fc, - const char *name, - umode_t mode, - struct afs_status_cb *dvnode_scb, - struct afs_fid *newfid, - struct afs_status_cb *new_scb) +void yfs_fs_make_dir(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(dvnode); - size_t namesz, reqsz, rplsz; + size_t reqsz, rplsz; __be32 *bp; _enter(""); - namesz = strlen(name); reqsz = (sizeof(__be32) + sizeof(struct yfs_xdr_RPCFlags) + sizeof(struct yfs_xdr_YFSFid) + - xdr_strlen(namesz) + + xdr_strlen(name->len) + sizeof(struct yfs_xdr_YFSStoreStatus)); rplsz = (sizeof(struct yfs_xdr_YFSFid) + sizeof(struct yfs_xdr_YFSFetchStatus) + @@ -757,29 +607,21 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc, sizeof(struct yfs_xdr_YFSCallBack) + sizeof(struct yfs_xdr_YFSVolSync)); - call = afs_alloc_flat_call(net, &yfs_RXFSMakeDir, reqsz, rplsz); + call = afs_alloc_flat_call(op->net, &yfs_RXFSMakeDir, reqsz, rplsz); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; - call->out_fid = newfid; - call->out_scb = new_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSMAKEDIR); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &dvnode->fid); - bp = xdr_encode_string(bp, name, namesz); - bp = xdr_encode_YFSStoreStatus_mode(bp, mode); + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + bp = xdr_encode_YFSStoreStatus(bp, &op->create.mode, &op->mtime); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &dvnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -787,6 +629,9 @@ int yfs_fs_make_dir(struct afs_fs_cursor *fc, */ static int yfs_deliver_fs_remove_file2(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; struct afs_fid fid; const __be32 *bp; int ret; @@ -798,20 +643,24 @@ static int yfs_deliver_fs_remove_file2(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); xdr_decode_YFSFid(&bp, &fid); - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); /* Was deleted if vnode->status.abort_code == VNOVNODE. */ - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSVolSync(&bp, &op->volsync); return 0; } +static void yfs_done_fs_remove_file2(struct afs_call *call) +{ + if (call->error == -ECONNABORTED && + call->abort_code == RX_INVALID_OPERATION) { + set_bit(AFS_SERVER_FL_NO_RM2, &call->server->flags); + call->op->flags |= AFS_OPERATION_DOWNGRADE; + } +} + /* * YFS.RemoveFile2 operation type. */ @@ -819,55 +668,44 @@ static const struct afs_call_type yfs_RXYFSRemoveFile2 = { .name = "YFS.RemoveFile2", .op = yfs_FS_RemoveFile2, .deliver = yfs_deliver_fs_remove_file2, + .done = yfs_done_fs_remove_file2, .destructor = afs_flat_call_destructor, }; /* * Remove a file and retrieve new file status. */ -int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, struct afs_status_cb *dvnode_scb, - struct afs_status_cb *vnode_scb) +void yfs_fs_remove_file2(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + struct afs_vnode_param *dvp = &op->file[0]; + const struct qstr *name = &op->dentry->d_name; struct afs_call *call; - struct afs_net *net = afs_v2net(dvnode); - size_t namesz; __be32 *bp; _enter(""); - namesz = strlen(name); - - call = afs_alloc_flat_call(net, &yfs_RXYFSRemoveFile2, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveFile2, sizeof(__be32) + sizeof(struct yfs_xdr_RPCFlags) + sizeof(struct yfs_xdr_YFSFid) + - xdr_strlen(namesz), + xdr_strlen(name->len), sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSFid) + sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; - call->out_scb = vnode_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSREMOVEFILE2); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &dvnode->fid); - bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &dvnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -875,6 +713,8 @@ int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int yfs_deliver_fs_remove(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; const __be32 *bp; int ret; @@ -885,11 +725,8 @@ static int yfs_deliver_fs_remove(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); return 0; } @@ -903,6 +740,43 @@ static const struct afs_call_type yfs_RXYFSRemoveFile = { .destructor = afs_flat_call_destructor, }; +/* + * Remove a file. + */ +void yfs_fs_remove_file(struct afs_operation *op) +{ + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_call *call; + __be32 *bp; + + _enter(""); + + if (!test_bit(AFS_SERVER_FL_NO_RM2, &op->server->flags)) + return yfs_fs_remove_file2(op); + + call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveFile, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); + if (!call) + return afs_op_nomem(op); + + /* marshall the parameters */ + bp = call->request; + bp = xdr_encode_u32(bp, YFSREMOVEFILE); + bp = xdr_encode_u32(bp, 0); /* RPC flags */ + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + yfs_check_req(call, bp); + + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); +} + static const struct afs_call_type yfs_RXYFSRemoveDir = { .name = "YFS.RemoveDir", .op = yfs_FS_RemoveDir, @@ -911,48 +785,37 @@ static const struct afs_call_type yfs_RXYFSRemoveDir = { }; /* - * remove a file or directory + * Remove a directory. */ -int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, bool isdir, - struct afs_status_cb *dvnode_scb) +void yfs_fs_remove_dir(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(dvnode); - size_t namesz; __be32 *bp; _enter(""); - namesz = strlen(name); - call = afs_alloc_flat_call( - net, isdir ? &yfs_RXYFSRemoveDir : &yfs_RXYFSRemoveFile, - sizeof(__be32) + - sizeof(struct yfs_xdr_RPCFlags) + - sizeof(struct yfs_xdr_YFSFid) + - xdr_strlen(namesz), - sizeof(struct yfs_xdr_YFSFetchStatus) + - sizeof(struct yfs_xdr_YFSVolSync)); + call = afs_alloc_flat_call(op->net, &yfs_RXYFSRemoveDir, + sizeof(__be32) + + sizeof(struct yfs_xdr_RPCFlags) + + sizeof(struct yfs_xdr_YFSFid) + + xdr_strlen(name->len), + sizeof(struct yfs_xdr_YFSFetchStatus) + + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; - bp = xdr_encode_u32(bp, isdir ? YFSREMOVEDIR : YFSREMOVEFILE); + bp = xdr_encode_u32(bp, YFSREMOVEDIR); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &dvnode->fid); - bp = xdr_encode_string(bp, name, namesz); + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &dvnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -960,6 +823,9 @@ int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int yfs_deliver_fs_link(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; const __be32 *bp; int ret; @@ -970,13 +836,9 @@ static int yfs_deliver_fs_link(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; } @@ -994,50 +856,39 @@ static const struct afs_call_type yfs_RXYFSLink = { /* * Make a hard link. */ -int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, - const char *name, - struct afs_status_cb *dvnode_scb, - struct afs_status_cb *vnode_scb) +void yfs_fs_link(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); - size_t namesz; __be32 *bp; _enter(""); - namesz = strlen(name); - call = afs_alloc_flat_call(net, &yfs_RXYFSLink, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSLink, sizeof(__be32) + sizeof(struct yfs_xdr_RPCFlags) + sizeof(struct yfs_xdr_YFSFid) + - xdr_strlen(namesz) + + xdr_strlen(name->len) + sizeof(struct yfs_xdr_YFSFid), sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; - call->out_scb = vnode_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSLINK); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &dvnode->fid); - bp = xdr_encode_string(bp, name, namesz); - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &vnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &vp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1045,6 +896,9 @@ int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode, */ static int yfs_deliver_fs_symlink(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; const __be32 *bp; int ret; @@ -1056,14 +910,10 @@ static int yfs_deliver_fs_symlink(struct afs_call *call) /* unmarshall the reply once we've received all of it */ bp = call->buffer; - xdr_decode_YFSFid(&bp, call->out_fid); - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSFid(&bp, &vp->fid); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSFetchStatus(&bp, call, &dvp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; @@ -1082,28 +932,23 @@ static const struct afs_call_type yfs_RXYFSSymlink = { /* * Create a symbolic link. */ -int yfs_fs_symlink(struct afs_fs_cursor *fc, - const char *name, - const char *contents, - struct afs_status_cb *dvnode_scb, - struct afs_fid *newfid, - struct afs_status_cb *vnode_scb) +void yfs_fs_symlink(struct afs_operation *op) { - struct afs_vnode *dvnode = fc->vnode; + const struct qstr *name = &op->dentry->d_name; + struct afs_vnode_param *dvp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(dvnode); - size_t namesz, contents_sz; + size_t contents_sz; + mode_t mode = 0777; __be32 *bp; _enter(""); - namesz = strlen(name); - contents_sz = strlen(contents); - call = afs_alloc_flat_call(net, &yfs_RXYFSSymlink, + contents_sz = strlen(op->create.symlink); + call = afs_alloc_flat_call(op->net, &yfs_RXYFSSymlink, sizeof(__be32) + sizeof(struct yfs_xdr_RPCFlags) + sizeof(struct yfs_xdr_YFSFid) + - xdr_strlen(namesz) + + xdr_strlen(name->len) + xdr_strlen(contents_sz) + sizeof(struct yfs_xdr_YFSStoreStatus), sizeof(struct yfs_xdr_YFSFid) + @@ -1111,28 +956,20 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc, sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = dvnode_scb; - call->out_fid = newfid; - call->out_scb = vnode_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSSYMLINK); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &dvnode->fid); - bp = xdr_encode_string(bp, name, namesz); - bp = xdr_encode_string(bp, contents, contents_sz); - bp = xdr_encode_YFSStoreStatus_mode(bp, S_IRWXUGO); + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_name(bp, name); + bp = xdr_encode_string(bp, op->create.symlink, contents_sz); + bp = xdr_encode_YFSStoreStatus(bp, &mode, &op->mtime); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call1(call, &dvnode->fid, name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call1(call, &dvp->fid, name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1140,6 +977,9 @@ int yfs_fs_symlink(struct afs_fs_cursor *fc, */ static int yfs_deliver_fs_rename(struct afs_call *call) { + struct afs_operation *op = call->op; + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; const __be32 *bp; int ret; @@ -1150,16 +990,12 @@ static int yfs_deliver_fs_rename(struct afs_call *call) return ret; bp = call->buffer; - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb); - if (ret < 0) - return ret; - if (call->out_dir_scb != call->out_scb) { - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - } - - xdr_decode_YFSVolSync(&bp, call->out_volsync); + /* If the two dirs are the same, we have two copies of the same status + * report, so we just decode it twice. + */ + xdr_decode_YFSFetchStatus(&bp, call, &orig_dvp->scb); + xdr_decode_YFSFetchStatus(&bp, call, &new_dvp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); _leave(" = 0 [done]"); return 0; } @@ -1177,55 +1013,42 @@ static const struct afs_call_type yfs_RXYFSRename = { /* * Rename a file or directory. */ -int yfs_fs_rename(struct afs_fs_cursor *fc, - const char *orig_name, - struct afs_vnode *new_dvnode, - const char *new_name, - struct afs_status_cb *orig_dvnode_scb, - struct afs_status_cb *new_dvnode_scb) +void yfs_fs_rename(struct afs_operation *op) { - struct afs_vnode *orig_dvnode = fc->vnode; + struct afs_vnode_param *orig_dvp = &op->file[0]; + struct afs_vnode_param *new_dvp = &op->file[1]; + const struct qstr *orig_name = &op->dentry->d_name; + const struct qstr *new_name = &op->dentry_2->d_name; struct afs_call *call; - struct afs_net *net = afs_v2net(orig_dvnode); - size_t o_namesz, n_namesz; __be32 *bp; _enter(""); - o_namesz = strlen(orig_name); - n_namesz = strlen(new_name); - call = afs_alloc_flat_call(net, &yfs_RXYFSRename, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSRename, sizeof(__be32) + sizeof(struct yfs_xdr_RPCFlags) + sizeof(struct yfs_xdr_YFSFid) + - xdr_strlen(o_namesz) + + xdr_strlen(orig_name->len) + sizeof(struct yfs_xdr_YFSFid) + - xdr_strlen(n_namesz), + xdr_strlen(new_name->len), sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_dir_scb = orig_dvnode_scb; - call->out_scb = new_dvnode_scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSRENAME); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &orig_dvnode->fid); - bp = xdr_encode_string(bp, orig_name, o_namesz); - bp = xdr_encode_YFSFid(bp, &new_dvnode->fid); - bp = xdr_encode_string(bp, new_name, n_namesz); + bp = xdr_encode_YFSFid(bp, &orig_dvp->fid); + bp = xdr_encode_name(bp, orig_name); + bp = xdr_encode_YFSFid(bp, &new_dvp->fid); + bp = xdr_encode_name(bp, new_name); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call2(call, &orig_dvnode->fid, orig_name, new_name); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call2(call, &orig_dvp->fid, orig_name, new_name); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1241,35 +1064,21 @@ static const struct afs_call_type yfs_RXYFSStoreData64 = { /* * Store a set of pages to a large file. */ -int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, - pgoff_t first, pgoff_t last, - unsigned offset, unsigned to, - struct afs_status_cb *scb) +void yfs_fs_store_data(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); - loff_t size, pos, i_size; __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); - - size = (loff_t)to - (loff_t)offset; - if (first != last) - size += (loff_t)(last - first) << PAGE_SHIFT; - pos = (loff_t)first << PAGE_SHIFT; - pos += offset; - - i_size = i_size_read(&vnode->vfs_inode); - if (pos + size > i_size) - i_size = size + pos; + key_serial(op->key), vp->fid.vid, vp->fid.vnode); _debug("size %llx, at %llx, i_size %llx", - (unsigned long long)size, (unsigned long long)pos, - (unsigned long long)i_size); + (unsigned long long)op->store.size, + (unsigned long long)op->store.pos, + (unsigned long long)op->store.i_size); - call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64, sizeof(__be32) + sizeof(__be32) + sizeof(struct yfs_xdr_YFSFid) + @@ -1278,33 +1087,23 @@ int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping, sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; + return afs_op_nomem(op); - call->key = fc->key; - call->mapping = mapping; - call->first = first; - call->last = last; - call->first_offset = offset; - call->last_to = to; - call->send_pages = true; - call->out_scb = scb; + call->write_iter = op->store.write_iter; /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSSTOREDATA64); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); - bp = xdr_encode_YFSStoreStatus_mtime(bp, &vnode->vfs_inode.i_mtime); - bp = xdr_encode_u64(bp, pos); - bp = xdr_encode_u64(bp, size); - bp = xdr_encode_u64(bp, i_size); + bp = xdr_encode_YFSFid(bp, &vp->fid); + bp = xdr_encode_YFSStoreStatus(bp, NULL, &op->mtime); + bp = xdr_encode_u64(bp, op->store.pos); + bp = xdr_encode_u64(bp, op->store.size); + bp = xdr_encode_u64(bp, op->store.i_size); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1328,18 +1127,17 @@ static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = { * Set the attributes on a file, using YFS.StoreData64 rather than * YFS.StoreStatus so as to alter the file size also. */ -static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr, - struct afs_status_cb *scb) +static void yfs_fs_setattr_size(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct iattr *attr = op->setattr.attr; __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); - call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64_as_Status, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64_as_Status, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid) + sizeof(struct yfs_xdr_YFSStoreStatus) + @@ -1347,72 +1145,59 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr, sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSSTOREDATA64); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); bp = xdr_encode_YFS_StoreStatus(bp, attr); bp = xdr_encode_u64(bp, attr->ia_size); /* position of start of write */ bp = xdr_encode_u64(bp, 0); /* size of write */ bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */ yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* * Set the attributes on a file, using YFS.StoreData64 if there's a change in * file size, and YFS.StoreStatus otherwise. */ -int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr, - struct afs_status_cb *scb) +void yfs_fs_setattr(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct iattr *attr = op->setattr.attr; __be32 *bp; if (attr->ia_valid & ATTR_SIZE) - return yfs_fs_setattr_size(fc, attr, scb); + return yfs_fs_setattr_size(op); _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); - call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreStatus, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid) + sizeof(struct yfs_xdr_YFSStoreStatus), sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSSTORESTATUS); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); bp = xdr_encode_YFS_StoreStatus(bp, attr); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1420,6 +1205,7 @@ int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr, */ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) { + struct afs_operation *op = call->op; const __be32 *bp; char *p; u32 size; @@ -1431,7 +1217,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) case 0: call->unmarshall++; afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus)); - /* Fall through */ + fallthrough; /* extract the returned status record */ case 1: @@ -1441,10 +1227,10 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_YFSFetchVolumeStatus(&bp, call->out_volstatus); + xdr_decode_YFSFetchVolumeStatus(&bp, &op->volstatus.vs); call->unmarshall++; afs_extract_to_tmp(call); - /* Fall through */ + fallthrough; /* extract the volume name length */ case 2: @@ -1455,12 +1241,11 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_volname_len); + return afs_protocol_error(call, afs_eproto_volname_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the volume name */ case 3: @@ -1474,7 +1259,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) _debug("volname '%s'", p); afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the offline message length */ case 4: @@ -1485,12 +1270,11 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_offline_msg_len); + return afs_protocol_error(call, afs_eproto_offline_msg_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the offline message */ case 5: @@ -1505,7 +1289,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the message of the day length */ case 6: @@ -1516,12 +1300,11 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); if (call->count >= AFSNAMEMAX) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_motd_len); + return afs_protocol_error(call, afs_eproto_motd_len); size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the message of the day */ case 7: @@ -1535,7 +1318,7 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) _debug("motd '%s'", p); call->unmarshall++; - /* Fall through */ + fallthrough; case 8: break; @@ -1558,17 +1341,15 @@ static const struct afs_call_type yfs_RXYFSGetVolumeStatus = { /* * fetch the status of a volume */ -int yfs_fs_get_volume_status(struct afs_fs_cursor *fc, - struct afs_volume_status *vs) +void yfs_fs_get_volume_status(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSGetVolumeStatus, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_u64), max_t(size_t, @@ -1576,23 +1357,17 @@ int yfs_fs_get_volume_status(struct afs_fs_cursor *fc, sizeof(__be32), AFSOPAQUEMAX + 1)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->out_volstatus = vs; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_u64(bp, vnode->fid.vid); + bp = xdr_encode_u64(bp, vp->fid.vid); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1630,118 +1405,117 @@ static const struct afs_call_type yfs_RXYFSReleaseLock = { /* * Set a lock on a file */ -int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type, - struct afs_status_cb *scb) +void yfs_fs_set_lock(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(net, &yfs_RXYFSSetLock, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSSetLock, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid) + sizeof(__be32), sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->lvnode = vnode; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSSETLOCK); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); - bp = xdr_encode_u32(bp, type); + bp = xdr_encode_YFSFid(bp, &vp->fid); + bp = xdr_encode_u32(bp, op->lock.type); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_calli(call, &vnode->fid, type); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_calli(call, &vp->fid, op->lock.type); + afs_make_op_call(op, call, GFP_NOFS); } /* * extend a lock on a file */ -int yfs_fs_extend_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) +void yfs_fs_extend_lock(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(net, &yfs_RXYFSExtendLock, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSExtendLock, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid), sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->lvnode = vnode; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSEXTENDLOCK); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* * release a lock on a file */ -int yfs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) +void yfs_fs_release_lock(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(""); - call = afs_alloc_flat_call(net, &yfs_RXYFSReleaseLock, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSReleaseLock, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid), sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); if (!call) - return -ENOMEM; - - call->key = fc->key; - call->lvnode = vnode; - call->out_scb = scb; + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSRELEASELOCK); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); +} + +/* + * Deliver a reply to YFS.FetchStatus + */ +static int yfs_deliver_fs_fetch_status(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSCallBack(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; } /* @@ -1750,52 +1524,40 @@ int yfs_fs_release_lock(struct afs_fs_cursor *fc, struct afs_status_cb *scb) static const struct afs_call_type yfs_RXYFSFetchStatus = { .name = "YFS.FetchStatus", .op = yfs_FS_FetchStatus, - .deliver = yfs_deliver_fs_status_cb_and_volsync, + .deliver = yfs_deliver_fs_fetch_status, .destructor = afs_flat_call_destructor, }; /* * Fetch the status information for a fid without needing a vnode handle. */ -int yfs_fs_fetch_status(struct afs_fs_cursor *fc, - struct afs_net *net, - struct afs_fid *fid, - struct afs_status_cb *scb, - struct afs_volsync *volsync) +void yfs_fs_fetch_status(struct afs_operation *op) { + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; struct afs_call *call; __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), fid->vid, fid->vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); - call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchStatus, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid), sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSCallBack) + sizeof(struct yfs_xdr_YFSVolSync)); - if (!call) { - fc->ac.error = -ENOMEM; - return -ENOMEM; - } - - call->key = fc->key; - call->out_scb = scb; - call->out_volsync = volsync; + if (!call) + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSFETCHSTATUS); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, fid); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1803,6 +1565,7 @@ int yfs_fs_fetch_status(struct afs_fs_cursor *fc, */ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) { + struct afs_operation *op = call->op; struct afs_status_cb *scb; const __be32 *bp; u32 tmp; @@ -1814,7 +1577,7 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* Extract the file status count and array in two steps */ case 1: @@ -1824,16 +1587,15 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; tmp = ntohl(call->tmp); - _debug("status count: %u/%u", tmp, call->count2); - if (tmp != call->count2) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_ibulkst_count); + _debug("status count: %u/%u", tmp, op->nr_files); + if (tmp != op->nr_files) + return afs_protocol_error(call, afs_eproto_ibulkst_count); call->count = 0; call->unmarshall++; more_counts: afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus)); - /* Fall through */ + fallthrough; case 2: _debug("extract status array %u", call->count); @@ -1841,20 +1603,29 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) if (ret < 0) return ret; + switch (call->count) { + case 0: + scb = &op->file[0].scb; + break; + case 1: + scb = &op->file[1].scb; + break; + default: + scb = &op->more_files[call->count - 2].scb; + break; + } + bp = call->buffer; - scb = &call->out_scb[call->count]; - ret = xdr_decode_YFSFetchStatus(&bp, call, scb); - if (ret < 0) - return ret; + xdr_decode_YFSFetchStatus(&bp, call, scb); call->count++; - if (call->count < call->count2) + if (call->count < op->nr_files) goto more_counts; call->count = 0; call->unmarshall++; afs_extract_to_tmp(call); - /* Fall through */ + fallthrough; /* Extract the callback count and array in two steps */ case 3: @@ -1865,14 +1636,13 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) tmp = ntohl(call->tmp); _debug("CB count: %u", tmp); - if (tmp != call->count2) - return afs_protocol_error(call, -EBADMSG, - afs_eproto_ibulkst_cb_count); + if (tmp != op->nr_files) + return afs_protocol_error(call, afs_eproto_ibulkst_cb_count); call->count = 0; call->unmarshall++; more_cbs: afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack)); - /* Fall through */ + fallthrough; case 4: _debug("extract CB array"); @@ -1881,16 +1651,27 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; _debug("unmarshall CB array"); + switch (call->count) { + case 0: + scb = &op->file[0].scb; + break; + case 1: + scb = &op->file[1].scb; + break; + default: + scb = &op->more_files[call->count - 2].scb; + break; + } + bp = call->buffer; - scb = &call->out_scb[call->count]; xdr_decode_YFSCallBack(&bp, call, scb); call->count++; - if (call->count < call->count2) + if (call->count < op->nr_files) goto more_cbs; afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync)); call->unmarshall++; - /* Fall through */ + fallthrough; case 5: ret = afs_extract_data(call, false); @@ -1898,10 +1679,10 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) return ret; bp = call->buffer; - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSVolSync(&bp, &op->volsync); call->unmarshall++; - /* Fall through */ + fallthrough; case 6: break; @@ -1924,50 +1705,39 @@ static const struct afs_call_type yfs_RXYFSInlineBulkStatus = { /* * Fetch the status information for up to 1024 files */ -int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, - struct afs_net *net, - struct afs_fid *fids, - struct afs_status_cb *statuses, - unsigned int nr_fids, - struct afs_volsync *volsync) +void yfs_fs_inline_bulk_status(struct afs_operation *op) { + struct afs_vnode_param *dvp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[1]; struct afs_call *call; __be32 *bp; int i; _enter(",%x,{%llx:%llu},%u", - key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids); + key_serial(op->key), vp->fid.vid, vp->fid.vnode, op->nr_files); - call = afs_alloc_flat_call(net, &yfs_RXYFSInlineBulkStatus, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSInlineBulkStatus, sizeof(__be32) + sizeof(__be32) + sizeof(__be32) + - sizeof(struct yfs_xdr_YFSFid) * nr_fids, + sizeof(struct yfs_xdr_YFSFid) * op->nr_files, sizeof(struct yfs_xdr_YFSFetchStatus)); - if (!call) { - fc->ac.error = -ENOMEM; - return -ENOMEM; - } - - call->key = fc->key; - call->out_scb = statuses; - call->out_volsync = volsync; - call->count2 = nr_fids; + if (!call) + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS); bp = xdr_encode_u32(bp, 0); /* RPCFlags */ - bp = xdr_encode_u32(bp, nr_fids); - for (i = 0; i < nr_fids; i++) - bp = xdr_encode_YFSFid(bp, &fids[i]); + bp = xdr_encode_u32(bp, op->nr_files); + bp = xdr_encode_YFSFid(bp, &dvp->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); + for (i = 0; i < op->nr_files - 2; i++) + bp = xdr_encode_YFSFid(bp, &op->more_files[i].fid); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &fids[0]); - afs_set_fc_call(call, fc); - afs_make_call(&fc->ac, call, GFP_NOFS); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_NOFS); } /* @@ -1975,7 +1745,9 @@ int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc, */ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) { - struct yfs_acl *yacl = call->out_yacl; + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[0]; + struct yfs_acl *yacl = op->yacl; struct afs_acl *acl; const __be32 *bp; unsigned int size; @@ -1987,7 +1759,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* Extract the file ACL length */ case 1: @@ -2009,7 +1781,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) afs_extract_discard(call, size); } call->unmarshall++; - /* Fall through */ + fallthrough; /* Extract the file ACL */ case 2: @@ -2019,7 +1791,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; - /* Fall through */ + fallthrough; /* Extract the volume ACL length */ case 3: @@ -2041,7 +1813,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) afs_extract_discard(call, size); } call->unmarshall++; - /* Fall through */ + fallthrough; /* Extract the volume ACL */ case 4: @@ -2054,7 +1826,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); call->unmarshall++; - /* Fall through */ + fallthrough; /* extract the metadata */ case 5: @@ -2065,13 +1837,11 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) bp = call->buffer; yacl->inherit_flag = ntohl(*bp++); yacl->num_cleaned = ntohl(*bp++); - ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb); - if (ret < 0) - return ret; - xdr_decode_YFSVolSync(&bp, call->out_volsync); + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); call->unmarshall++; - /* Fall through */ + fallthrough; case 6: break; @@ -2103,45 +1873,33 @@ static const struct afs_call_type yfs_RXYFSFetchOpaqueACL = { /* * Fetch the YFS advanced ACLs for a file. */ -struct yfs_acl *yfs_fs_fetch_opaque_acl(struct afs_fs_cursor *fc, - struct yfs_acl *yacl, - struct afs_status_cb *scb) +void yfs_fs_fetch_opaque_acl(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); - call = afs_alloc_flat_call(net, &yfs_RXYFSFetchOpaqueACL, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchOpaqueACL, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid), sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); - if (!call) { - fc->ac.error = -ENOMEM; - return ERR_PTR(-ENOMEM); - } - - call->key = fc->key; - call->out_yacl = yacl; - call->out_scb = scb; - call->out_volsync = NULL; + if (!call) + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSFETCHOPAQUEACL); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); yfs_check_req(call, bp); - afs_use_fs_server(call, fc->cbi); - trace_afs_make_fs_call(call, &vnode->fid); - afs_make_call(&fc->ac, call, GFP_KERNEL); - return (struct yfs_acl *)afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_KERNEL); } /* @@ -2157,46 +1915,39 @@ static const struct afs_call_type yfs_RXYFSStoreOpaqueACL2 = { /* * Fetch the YFS ACL for a file. */ -int yfs_fs_store_opaque_acl2(struct afs_fs_cursor *fc, const struct afs_acl *acl, - struct afs_status_cb *scb) +void yfs_fs_store_opaque_acl2(struct afs_operation *op) { - struct afs_vnode *vnode = fc->vnode; + struct afs_vnode_param *vp = &op->file[0]; struct afs_call *call; - struct afs_net *net = afs_v2net(vnode); + struct afs_acl *acl = op->acl; size_t size; __be32 *bp; _enter(",%x,{%llx:%llu},,", - key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode); + key_serial(op->key), vp->fid.vid, vp->fid.vnode); size = round_up(acl->size, 4); - call = afs_alloc_flat_call(net, &yfs_RXYFSStoreOpaqueACL2, + call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreOpaqueACL2, sizeof(__be32) * 2 + sizeof(struct yfs_xdr_YFSFid) + sizeof(__be32) + size, sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); - if (!call) { - fc->ac.error = -ENOMEM; - return -ENOMEM; - } - - call->key = fc->key; - call->out_scb = scb; - call->out_volsync = NULL; + if (!call) + return afs_op_nomem(op); /* marshall the parameters */ bp = call->request; bp = xdr_encode_u32(bp, YFSSTOREOPAQUEACL2); bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vnode->fid); + bp = xdr_encode_YFSFid(bp, &vp->fid); bp = xdr_encode_u32(bp, acl->size); memcpy(bp, acl->data, acl->size); if (acl->size != size) memset((void *)bp + acl->size, 0, size - acl->size); + bp += size / sizeof(__be32); yfs_check_req(call, bp); - trace_afs_make_fs_call(call, &vnode->fid); - afs_make_call(&fc->ac, call, GFP_KERNEL); - return afs_wait_for_call_to_complete(call, &fc->ac); + trace_afs_make_fs_call(call, &vp->fid); + afs_make_op_call(op, call, GFP_KERNEL); } |