From 8003d3c4aaa5560400818e14ce5db49cdfd79865 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 May 2014 06:28:45 -0400 Subject: nfs4: treat lock owners as opaque values Do the following set of ops with a file on a NFSv4 mount: exec 3>>/file/on/nfsv4 flock -x 3 exec 3>&- You'll see the LOCK request go across the wire, but no LOCKU when the file is closed. What happens is that the fd is passed across a fork, and the final close is done in a different process than the opener. That makes __nfs4_find_lock_state miss finding the correct lock state because it uses the fl_pid as a search key. A new one is created, and the locking code treats it as a delegation stateid (because NFS_LOCK_INITIALIZED isn't set). The root cause of this breakage seems to be commit 77041ed9b49a9e (NFSv4: Ensure the lockowners are labelled using the fl_owner and/or fl_pid). That changed it so that flock lockowners are allocated based on the fl_pid. I think this is incorrect. flock locks should be "owned" by the struct file, and that is already accounted for in the fl_owner field of the lock request when it comes through nfs_flock. This patch basically reverts the above commit and with it, a LOCKU is sent in the above reproducer. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 45 +++++++++------------------------------------ 1 file changed, 9 insertions(+), 36 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 848f6853c59e..544040835482 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -787,21 +787,12 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) * that is compatible with current->files */ static struct nfs4_lock_state * -__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *pos; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) + if (pos->ls_owner != fl_owner) continue; - switch (pos->ls_owner.lo_type) { - case NFS4_POSIX_LOCK_TYPE: - if (pos->ls_owner.lo_u.posix_owner != fl_owner) - continue; - break; - case NFS4_FLOCK_LOCK_TYPE: - if (pos->ls_owner.lo_u.flock_owner != fl_pid) - continue; - } atomic_inc(&pos->ls_count); return pos; } @@ -813,7 +804,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) +static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; struct nfs_server *server = state->owner->so_server; @@ -824,17 +815,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f nfs4_init_seqid_counter(&lsp->ls_seqid); atomic_set(&lsp->ls_count, 1); lsp->ls_state = state; - lsp->ls_owner.lo_type = type; - switch (lsp->ls_owner.lo_type) { - case NFS4_FLOCK_LOCK_TYPE: - lsp->ls_owner.lo_u.flock_owner = fl_pid; - break; - case NFS4_POSIX_LOCK_TYPE: - lsp->ls_owner.lo_u.posix_owner = fl_owner; - break; - default: - goto out_free; - } + lsp->ls_owner = fl_owner; lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); if (lsp->ls_seqid.owner_id < 0) goto out_free; @@ -857,13 +838,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { struct nfs4_lock_state *lsp, *new = NULL; for(;;) { spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, owner, pid, type); + lsp = __nfs4_find_lock_state(state, owner); if (lsp != NULL) break; if (new != NULL) { @@ -874,7 +855,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ break; } spin_unlock(&state->state_lock); - new = nfs4_alloc_lock_state(state, owner, pid, type); + new = nfs4_alloc_lock_state(state, owner); if (new == NULL) return NULL; } @@ -935,13 +916,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) if (fl->fl_ops != NULL) return 0; - if (fl->fl_flags & FL_POSIX) - lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); - else if (fl->fl_flags & FL_FLOCK) - lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid, - NFS4_FLOCK_LOCK_TYPE); - else - return -EINVAL; + lsp = nfs4_get_lock_state(state, fl->fl_owner); if (lsp == NULL) return -ENOMEM; fl->fl_u.nfs4_fl.owner = lsp; @@ -955,7 +930,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; - pid_t fl_pid; int ret = -ENOENT; @@ -966,9 +940,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, goto out; fl_owner = lockowner->l_owner; - fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); + lsp = __nfs4_find_lock_state(state, fl_owner); if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) ret = -EIO; else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { -- cgit v1.2.3-59-g8ed1b From 49a4bda22e186c4d0eb07f4a36b5b1a378f9398d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 May 2014 06:28:46 -0400 Subject: nfs4: queue free_lock_state job submission to nfsiod We got a report of the following warning in Fedora: BUG: sleeping function called from invalid context at mm/slub.c:969 in_atomic(): 1, irqs_disabled(): 0, pid: 533, name: bash 3 locks held by bash/533: #0: (&sp->so_delegreturn_mutex){+.+...}, at: [] nfs4_proc_lock+0x262/0x910 [nfsv4] #1: (&nfsi->rwsem){.+.+.+}, at: [] nfs4_proc_lock+0x26a/0x910 [nfsv4] #2: (&sb->s_type->i_lock_key#23){+.+...}, at: [] flock_lock_file_wait+0x8c/0x3a0 CPU: 0 PID: 533 Comm: bash Not tainted 3.15.0-0.rc1.git1.1.fc21.x86_64 #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000000 00000000d664ff3c ffff880078b69a70 ffffffff817e82e0 0000000000000000 ffff880078b69a98 ffffffff810cf1a4 0000000000000050 0000000000000050 ffff88007cc01a00 ffff880078b69ad8 ffffffff8121449e Call Trace: [] dump_stack+0x4d/0x66 [] __might_sleep+0x184/0x240 [] kmem_cache_alloc_trace+0x4e/0x330 [] ? nfs4_release_lockowner+0x74/0x110 [nfsv4] [] nfs4_release_lockowner+0x74/0x110 [nfsv4] [] nfs4_put_lock_state+0x90/0xb0 [nfsv4] [] nfs4_fl_release_lock+0x15/0x20 [nfsv4] [] locks_free_lock+0x45/0x90 [] flock_lock_file_wait+0x11c/0x3a0 [] ? nfs4_proc_lock+0x26a/0x910 [nfsv4] [] do_vfs_lock+0x1e/0x30 [nfsv4] [] nfs4_proc_lock+0x279/0x910 [nfsv4] [] ? local_clock+0x16/0x30 [] ? lock_release_holdtime.part.28+0xf/0x200 [] do_unlk+0x8c/0xc0 [nfs] [] nfs_flock+0xa5/0xf0 [nfs] [] locks_remove_file+0xb6/0x1e0 [] ? kfree+0xd8/0x2d0 [] __fput+0xd3/0x210 [] ____fput+0xe/0x10 [] task_work_run+0xcd/0xf0 [] do_notify_resume+0x61/0x90 [] int_signal+0x12/0x17 The problem is that NFSv4 is trying to do an allocation from fl_release_private (in order to send a RELEASE_LOCKOWNER call). That function can be called while holding the inode->i_lock, and it's currently set up to do __GFP_WAIT allocations. v4.1 code has a similar problem. This patch adds a work_struct to the nfs4_lock_state and has the code queue the free_lock_state operation to nfsiod. Reported-by: Josh Stone Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 13 +++++++------ fs/nfs/nfs4state.c | 24 ++++++++++++++++++------ 2 files changed, 25 insertions(+), 12 deletions(-) (limited to 'fs/nfs/nfs4state.c') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 19f567c39670..5e2a8afc72cb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -130,15 +130,16 @@ enum { */ struct nfs4_lock_state { - struct list_head ls_locks; /* Other lock stateids */ - struct nfs4_state * ls_state; /* Pointer to open state */ + struct list_head ls_locks; /* Other lock stateids */ + struct nfs4_state * ls_state; /* Pointer to open state */ #define NFS_LOCK_INITIALIZED 0 #define NFS_LOCK_LOST 1 - unsigned long ls_flags; + unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; - nfs4_stateid ls_stateid; - atomic_t ls_count; - fl_owner_t ls_owner; + nfs4_stateid ls_stateid; + atomic_t ls_count; + fl_owner_t ls_owner; + struct work_struct ls_release; }; /* bits for nfs4_state->flags */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 544040835482..a770c8e469a7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -799,6 +799,18 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } +static void +free_lock_state_work(struct work_struct *work) +{ + struct nfs4_lock_state *lsp = container_of(work, + struct nfs4_lock_state, ls_release); + struct nfs4_state *state = lsp->ls_state; + struct nfs_server *server = state->owner->so_server; + struct nfs_client *clp = server->nfs_client; + + clp->cl_mvops->free_lock_state(server, lsp); +} + /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -820,6 +832,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f if (lsp->ls_seqid.owner_id < 0) goto out_free; INIT_LIST_HEAD(&lsp->ls_locks); + INIT_WORK(&lsp->ls_release, free_lock_state_work); return lsp; out_free: kfree(lsp); @@ -883,13 +896,12 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - server = state->owner->so_server; - if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { - struct nfs_client *clp = server->nfs_client; - - clp->cl_mvops->free_lock_state(server, lsp); - } else + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) + queue_work(nfsiod_workqueue, &lsp->ls_release); + else { + server = state->owner->so_server; nfs4_free_lock_state(server, lsp); + } } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) -- cgit v1.2.3-59-g8ed1b