From 826d7bc9f013d01e92997883d2fd0c25f4af1f1c Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Fri, 8 Jun 2018 17:27:11 +0300 Subject: fs/lock: skip lock owner pid translation in case we are in init_pid_ns If the flock owner process is dead and its pid has been already freed, pid translation won't work, but we still want to show flock owner pid number when expecting /proc/$PID/fdinfo/$FD in init pidns. Reproducer: process A process A1 process A2 fork()---------> exit() open() flock() fork()---------> exit() sleep() Before the patch: ================ (root@vz7)/: cat /proc/${PID_A2}/fdinfo/3 pos: 4 flags: 02100002 mnt_id: 257 lock: (root@vz7)/: After the patch: =============== (root@vz7)/:cat /proc/${PID_A2}/fdinfo/3 pos: 4 flags: 02100002 mnt_id: 295 lock: 1: FLOCK ADVISORY WRITE ${PID_A1} b6:f8a61:529946 0 EOF Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks") Signed-off-by: Konstantin Khorenko Acked-by: Andrey Vagin Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton --- fs/locks.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/locks.c') diff --git a/fs/locks.c b/fs/locks.c index 05e211be8684..bfee5b7f2862 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2072,6 +2072,13 @@ static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns) return -1; if (IS_REMOTELCK(fl)) return fl->fl_pid; + /* + * If the flock owner process is dead and its pid has been already + * freed, the translation below won't work, but we still want to show + * flock owner pid number in init pidns. + */ + if (ns == &init_pid_ns) + return (pid_t)fl->fl_pid; rcu_read_lock(); pid = find_pid_ns(fl->fl_pid, &init_pid_ns); -- cgit v1.2.3-59-g8ed1b From 1cf8e5de4055f85383405a21a0a7c3c4348bf2ed Mon Sep 17 00:00:00 2001 From: Konstantin Khorenko Date: Fri, 8 Jun 2018 17:27:12 +0300 Subject: fs/lock: show locks taken by processes from another pidns Currently if we face a lock taken by a process invisible in the current pidns we skip the lock completely, but this 1) makes the output not that nice (root@vz7)/: cat /proc/${PID_A2}/fdinfo/3 pos: 4 flags: 02100002 mnt_id: 257 lock: (root@vz7)/: 2) makes it more difficult to debug issues with leaked flocks if you get error on lock, but don't see any locks in /proc/$id/fdinfo/$file Let's show information about such locks again as previously, but show zero in the owner pid field. After the patch: =============== (root@vz7)/:cat /proc/${PID_A2}/fdinfo/3 pos: 4 flags: 02100002 mnt_id: 295 lock: 1: FLOCK ADVISORY WRITE 0 b6:f8a61:529946 0 EOF Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks") Signed-off-by: Konstantin Khorenko Acked-by: Andrey Vagin Reviewed-by: Benjamin Coddington Signed-off-by: Jeff Layton --- fs/locks.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/locks.c') diff --git a/fs/locks.c b/fs/locks.c index bfee5b7f2862..e533623e2e99 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2633,12 +2633,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, fl_pid = locks_translate_pid(fl, proc_pidns); /* - * If there isn't a fl_pid don't display who is waiting on - * the lock if we are called from locks_show, or if we are - * called from __show_fd_info - skip lock entirely + * If lock owner is dead (and pid is freed) or not visible in current + * pidns, zero is shown as a pid value. Check lock info from + * init_pid_ns to get saved lock pid value. */ - if (fl_pid == 0) - return; if (fl->fl_file != NULL) inode = locks_inode(fl->fl_file); -- cgit v1.2.3-59-g8ed1b From 8cf9ee5061037accf61775f438ad7513576d4413 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:43 +0200 Subject: Revert "vfs: do get_write_access() on upper layer of overlayfs" This reverts commit 4d0c5ba2ff79ef9f5188998b29fd28fcb05f3667. We now get write access on both overlay and underlying layers so this patch is no longer needed for correct operation. Signed-off-by: Miklos Szeredi --- fs/locks.c | 3 +-- fs/open.c | 15 ++------------- 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'fs/locks.c') diff --git a/fs/locks.c b/fs/locks.c index db7b6917d9c5..baa564841c03 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1654,8 +1654,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags) if (flags & FL_LAYOUT) return 0; - if ((arg == F_RDLCK) && - (atomic_read(&d_real_inode(dentry)->i_writecount) > 0)) + if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) return -EAGAIN; if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || diff --git a/fs/open.c b/fs/open.c index a973ca074896..72bcae72bce9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -68,7 +68,6 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, long vfs_truncate(const struct path *path, loff_t length) { struct inode *inode; - struct dentry *upperdentry; long error; inode = path->dentry->d_inode; @@ -91,17 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length) if (IS_APPEND(inode)) goto mnt_drop_write_and_out; - /* - * If this is an overlayfs then do as if opening the file so we get - * write access on the upper inode, not on the overlay inode. For - * non-overlay filesystems d_real() is an identity function. - */ - upperdentry = d_real(path->dentry, NULL, O_WRONLY); - error = PTR_ERR(upperdentry); - if (IS_ERR(upperdentry)) - goto mnt_drop_write_and_out; - - error = get_write_access(upperdentry->d_inode); + error = get_write_access(inode); if (error) goto mnt_drop_write_and_out; @@ -120,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length) error = do_truncate(path->dentry, length, 0, NULL); put_write_and_out: - put_write_access(upperdentry->d_inode); + put_write_access(inode); mnt_drop_write_and_out: mnt_drop_write(path->mnt); out: -- cgit v1.2.3-59-g8ed1b From de2a4a501e716bbf5ff691ba16faf59a35320228 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Jul 2018 15:44:43 +0200 Subject: Partially revert "locks: fix file locking on overlayfs" This partially reverts commit c568d68341be7030f5647def68851e469b21ca11. Overlayfs files will now automatically get the correct locks, no need to hack overlay support in VFS. It is a partial revert, because it leaves the locks_inode() calls in place and defines locks_inode() to file_inode(). We could revert those as well, but it would be unnecessary code churn and it makes sense to document that we are getting the inode for locking purposes. Don't revert MS_NOREMOTELOCK yet since that has been part of the userspace API for some time (though not in a useful way). Will try to remove internal flags later when the dust around the new mount API settles. Signed-off-by: Miklos Szeredi Acked-by: Jeff Layton --- fs/locks.c | 17 ++++++----------- fs/overlayfs/super.c | 2 +- include/linux/fs.h | 13 +------------ 3 files changed, 8 insertions(+), 24 deletions(-) (limited to 'fs/locks.c') diff --git a/fs/locks.c b/fs/locks.c index baa564841c03..dab4e72f8bff 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -139,11 +139,6 @@ #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) #define IS_REMOTELCK(fl) (fl->fl_pid <= 0) -static inline bool is_remote_lock(struct file *filp) -{ - return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK)); -} - static bool lease_breaking(struct file_lock *fl) { return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); @@ -1875,7 +1870,7 @@ EXPORT_SYMBOL(generic_setlease); int vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) { - if (filp->f_op->setlease && is_remote_lock(filp)) + if (filp->f_op->setlease) return filp->f_op->setlease(filp, arg, lease, priv); else return generic_setlease(filp, arg, lease, priv); @@ -2022,7 +2017,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) if (error) goto out_free; - if (f.file->f_op->flock && is_remote_lock(f.file)) + if (f.file->f_op->flock) error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, lock); @@ -2048,7 +2043,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op->lock && is_remote_lock(filp)) + if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); return 0; @@ -2191,7 +2186,7 @@ out: */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { - if (filp->f_op->lock && is_remote_lock(filp)) + if (filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf); @@ -2513,7 +2508,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx) if (list_empty(&flctx->flc_flock)) return; - if (filp->f_op->flock && is_remote_lock(filp)) + if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else flock_lock_inode(inode, &fl); @@ -2600,7 +2595,7 @@ EXPORT_SYMBOL(posix_unblock_lock); */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op->lock && is_remote_lock(filp)) + if (filp->f_op->lock) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 5bc261de5041..c63beccad4fc 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1456,7 +1456,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ovl_super_operations; sb->s_xattr = ovl_xattr_handlers; sb->s_fs_info = ofs; - sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK; + sb->s_flags |= SB_POSIXACL; err = -ENOMEM; root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); diff --git a/include/linux/fs.h b/include/linux/fs.h index 16e2741cec3c..1cbcf37c45e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1054,17 +1054,7 @@ struct file_lock_context { extern void send_sigio(struct fown_struct *fown, int fd, int band); -/* - * Return the inode to use for locking - * - * For overlayfs this should be the overlay inode, not the real inode returned - * by file_inode(). For any other fs file_inode(filp) and locks_inode(filp) are - * equal. - */ -static inline struct inode *locks_inode(const struct file *f) -{ - return f->f_path.dentry->d_inode; -} +#define locks_inode(f) file_inode(f) #ifdef CONFIG_FILE_LOCKING extern int fcntl_getlk(struct file *, unsigned int, struct flock *); @@ -1305,7 +1295,6 @@ extern int send_sigurg(struct fown_struct *fown); /* These sb flags are internal to the kernel */ #define SB_SUBMOUNT (1<<26) -#define SB_NOREMOTELOCK (1<<27) #define SB_NOSEC (1<<28) #define SB_BORN (1<<29) #define SB_ACTIVE (1<<30) -- cgit v1.2.3-59-g8ed1b From 019191342fecce4a461978a7191a43f313e19e86 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Jul 2017 22:05:57 -0500 Subject: signal: Use PIDTYPE_TGID to clearly store where file signals will be sent When f_setown is called a pid and a pid type are stored. Replace the use of PIDTYPE_PID with PIDTYPE_TGID as PIDTYPE_TGID goes to the entire thread group. Replace the use of PIDTYPE_MAX with PIDTYPE_PID as PIDTYPE_PID now is only for a thread. Update the users of __f_setown to use PIDTYPE_TGID instead of PIDTYPE_PID. For now the code continues to capture task_pid (when task_tgid would really be appropriate), and iterate on PIDTYPE_PID (even when type == PIDTYPE_TGID) out of an abundance of caution to preserve existing behavior. Oleg Nesterov suggested using the test to ensure we use PIDTYPE_PID for tgid lookup also be used to avoid taking the tasklist lock. Suggested-by: Oleg Nesterov Signed-off-by: "Eric W. Biederman" --- drivers/net/tun.c | 2 +- drivers/tty/tty_io.c | 2 +- fs/fcntl.c | 54 +++++++++++++++++++++++++++------------------ fs/locks.c | 2 +- fs/notify/dnotify/dnotify.c | 3 ++- 5 files changed, 37 insertions(+), 26 deletions(-) (limited to 'fs/locks.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a192a017cc68..9958b70ac1b0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3216,7 +3216,7 @@ static int tun_chr_fasync(int fd, struct file *file, int on) goto out; if (on) { - __f_setown(file, task_pid(current), PIDTYPE_PID, 0); + __f_setown(file, task_pid(current), PIDTYPE_TGID, 0); tfile->flags |= TUN_FASYNC; } else tfile->flags &= ~TUN_FASYNC; diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index aba59521ad48..090fb7e78eea 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2122,7 +2122,7 @@ static int __tty_fasync(int fd, struct file *filp, int on) type = PIDTYPE_PGID; } else { pid = task_pid(current); - type = PIDTYPE_PID; + type = PIDTYPE_TGID; } get_pid(pid); spin_unlock_irqrestore(&tty->ctrl_lock, flags); diff --git a/fs/fcntl.c b/fs/fcntl.c index 12273b6ea56d..1523588fd759 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -116,7 +116,7 @@ int f_setown(struct file *filp, unsigned long arg, int force) struct pid *pid = NULL; int who = arg, ret = 0; - type = PIDTYPE_PID; + type = PIDTYPE_TGID; if (who < 0) { /* avoid overflow below */ if (who == INT_MIN) @@ -143,7 +143,7 @@ EXPORT_SYMBOL(f_setown); void f_delown(struct file *filp) { - f_modown(filp, NULL, PIDTYPE_PID, 1); + f_modown(filp, NULL, PIDTYPE_TGID, 1); } pid_t f_getown(struct file *filp) @@ -171,11 +171,11 @@ static int f_setown_ex(struct file *filp, unsigned long arg) switch (owner.type) { case F_OWNER_TID: - type = PIDTYPE_MAX; + type = PIDTYPE_PID; break; case F_OWNER_PID: - type = PIDTYPE_PID; + type = PIDTYPE_TGID; break; case F_OWNER_PGRP: @@ -206,11 +206,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg) read_lock(&filp->f_owner.lock); owner.pid = pid_vnr(filp->f_owner.pid); switch (filp->f_owner.pid_type) { - case PIDTYPE_MAX: + case PIDTYPE_PID: owner.type = F_OWNER_TID; break; - case PIDTYPE_PID: + case PIDTYPE_TGID: owner.type = F_OWNER_PID; break; @@ -785,20 +785,25 @@ void send_sigio(struct fown_struct *fown, int fd, int band) read_lock(&fown->lock); type = fown->pid_type; - if (type == PIDTYPE_MAX) { + if (type == PIDTYPE_PID) group = 0; - type = PIDTYPE_PID; - } pid = fown->pid; if (!pid) goto out_unlock_fown; - - read_lock(&tasklist_lock); - do_each_pid_task(pid, type, p) { + + if (type <= PIDTYPE_TGID) { + rcu_read_lock(); + p = pid_task(pid, PIDTYPE_PID); send_sigio_to_task(p, fown, fd, band, group); - } while_each_pid_task(pid, type, p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); + } else { + read_lock(&tasklist_lock); + do_each_pid_task(pid, type, p) { + send_sigio_to_task(p, fown, fd, band, group); + } while_each_pid_task(pid, type, p); + read_unlock(&tasklist_lock); + } out_unlock_fown: read_unlock(&fown->lock); } @@ -821,22 +826,27 @@ int send_sigurg(struct fown_struct *fown) read_lock(&fown->lock); type = fown->pid_type; - if (type == PIDTYPE_MAX) { + if (type == PIDTYPE_PID) group = 0; - type = PIDTYPE_PID; - } pid = fown->pid; if (!pid) goto out_unlock_fown; ret = 1; - - read_lock(&tasklist_lock); - do_each_pid_task(pid, type, p) { + + if (type <= PIDTYPE_TGID) { + rcu_read_lock(); + p = pid_task(pid, PIDTYPE_PID); send_sigurg_to_task(p, fown, group); - } while_each_pid_task(pid, type, p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); + } else { + read_lock(&tasklist_lock); + do_each_pid_task(pid, type, p) { + send_sigurg_to_task(p, fown, group); + } while_each_pid_task(pid, type, p); + read_unlock(&tasklist_lock); + } out_unlock_fown: read_unlock(&fown->lock); return ret; diff --git a/fs/locks.c b/fs/locks.c index db7b6917d9c5..cfc059bda8ea 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -546,7 +546,7 @@ lease_setup(struct file_lock *fl, void **priv) if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa)) *priv = NULL; - __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); + __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0); } static const struct lock_manager_operations lease_manager_ops = { diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index e2bea2ac5dfb..484f2c3a33bb 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -353,7 +354,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) goto out; } - __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); + __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0); error = attach_dn(dn, dn_mark, id, fd, filp, mask); /* !error means that we attached the dn to the dn_mark, so don't free it */ -- cgit v1.2.3-59-g8ed1b From c883da313ebf459efd33d262ca963e3a5f0ac024 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Jul 2018 07:54:56 -0400 Subject: locks: add tracepoint in flock codepath Signed-off-by: Jeff Layton --- fs/locks.c | 1 + include/trace/events/filelock.h | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/locks.c') diff --git a/fs/locks.c b/fs/locks.c index e533623e2e99..6138a9bcd924 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -990,6 +990,7 @@ out: if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); + trace_flock_lock_inode(inode, request, error); return error; } diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index d1faf3597b9d..68b17c116907 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -112,8 +112,11 @@ DEFINE_EVENT(filelock_lock, locks_remove_posix, TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), TP_ARGS(inode, fl, ret)); -DECLARE_EVENT_CLASS(filelock_lease, +DEFINE_EVENT(filelock_lock, flock_lock_inode, + TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), + TP_ARGS(inode, fl, ret)); +DECLARE_EVENT_CLASS(filelock_lease, TP_PROTO(struct inode *inode, struct file_lock *fl), TP_ARGS(inode, fl), -- cgit v1.2.3-59-g8ed1b From da33a871ba178dbe81da7d755818d3c2088cae32 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 8 Aug 2018 12:54:09 -0400 Subject: locks: remove misleading obsolete comment The spinlock handling in this file has changed significantly since this comment was written, and the file_lock_lock is no more. In addition, this overall comment no longer applies. Deleting an entry now requires both locks. Signed-off-by: Jeff Layton --- fs/locks.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/locks.c') diff --git a/fs/locks.c b/fs/locks.c index 6138a9bcd924..11a4d698aba8 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -202,10 +202,6 @@ static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); * we often hold the flc_lock as well. In certain cases, when reading the fields * protected by this lock, we can skip acquiring it iff we already hold the * flc_lock. - * - * In particular, adding an entry to the fl_block list requires that you hold - * both the flc_lock and the blocked_lock_lock (acquired in that order). - * Deleting an entry from the list however only requires the file_lock_lock. */ static DEFINE_SPINLOCK(blocked_lock_lock); -- cgit v1.2.3-59-g8ed1b