diff options
Diffstat (limited to 'fs/locks.c')
| -rw-r--r-- | fs/locks.c | 389 | 
1 files changed, 247 insertions, 142 deletions
diff --git a/fs/locks.c b/fs/locks.c index 92a0f0a52b06..13fc7a6d380a 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -135,6 +135,7 @@  #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)  #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)  #define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG)) +#define IS_FILE_PVT(fl)	(fl->fl_flags & FL_FILE_PVT)  static bool lease_breaking(struct file_lock *fl)  { @@ -344,48 +345,43 @@ static int assign_type(struct file_lock *fl, long type)  	return 0;  } -/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX - * style lock. - */ -static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, -			       struct flock *l) +static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, +				 struct flock64 *l)  { -	off_t start, end; -  	switch (l->l_whence) {  	case SEEK_SET: -		start = 0; +		fl->fl_start = 0;  		break;  	case SEEK_CUR: -		start = filp->f_pos; +		fl->fl_start = filp->f_pos;  		break;  	case SEEK_END: -		start = i_size_read(file_inode(filp)); +		fl->fl_start = i_size_read(file_inode(filp));  		break;  	default:  		return -EINVAL;  	} +	if (l->l_start > OFFSET_MAX - fl->fl_start) +		return -EOVERFLOW; +	fl->fl_start += l->l_start; +	if (fl->fl_start < 0) +		return -EINVAL;  	/* POSIX-1996 leaves the case l->l_len < 0 undefined;  	   POSIX-2001 defines it. */ -	start += l->l_start; -	if (start < 0) -		return -EINVAL; -	fl->fl_end = OFFSET_MAX;  	if (l->l_len > 0) { -		end = start + l->l_len - 1; -		fl->fl_end = end; +		if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) +			return -EOVERFLOW; +		fl->fl_end = fl->fl_start + l->l_len - 1; +  	} else if (l->l_len < 0) { -		end = start - 1; -		fl->fl_end = end; -		start += l->l_len; -		if (start < 0) +		if (fl->fl_start + l->l_len < 0)  			return -EINVAL; -	} -	fl->fl_start = start;	/* we record the absolute position */ -	if (fl->fl_end < fl->fl_start) -		return -EOVERFLOW; -	 +		fl->fl_end = fl->fl_start - 1; +		fl->fl_start += l->l_len; +	} else +		fl->fl_end = OFFSET_MAX; +  	fl->fl_owner = current->files;  	fl->fl_pid = current->tgid;  	fl->fl_file = filp; @@ -393,55 +389,36 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,  	fl->fl_ops = NULL;  	fl->fl_lmops = NULL; -	return assign_type(fl, l->l_type); -} - -#if BITS_PER_LONG == 32 -static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, -				 struct flock64 *l) -{ -	loff_t start; - -	switch (l->l_whence) { -	case SEEK_SET: -		start = 0; -		break; -	case SEEK_CUR: -		start = filp->f_pos; +	/* Ensure that fl->fl_filp has compatible f_mode */ +	switch (l->l_type) { +	case F_RDLCK: +		if (!(filp->f_mode & FMODE_READ)) +			return -EBADF;  		break; -	case SEEK_END: -		start = i_size_read(file_inode(filp)); +	case F_WRLCK: +		if (!(filp->f_mode & FMODE_WRITE)) +			return -EBADF;  		break; -	default: -		return -EINVAL;  	} -	start += l->l_start; -	if (start < 0) -		return -EINVAL; -	fl->fl_end = OFFSET_MAX; -	if (l->l_len > 0) { -		fl->fl_end = start + l->l_len - 1; -	} else if (l->l_len < 0) { -		fl->fl_end = start - 1; -		start += l->l_len; -		if (start < 0) -			return -EINVAL; -	} -	fl->fl_start = start;	/* we record the absolute position */ -	if (fl->fl_end < fl->fl_start) -		return -EOVERFLOW; -	 -	fl->fl_owner = current->files; -	fl->fl_pid = current->tgid; -	fl->fl_file = filp; -	fl->fl_flags = FL_POSIX; -	fl->fl_ops = NULL; -	fl->fl_lmops = NULL; -  	return assign_type(fl, l->l_type);  } -#endif + +/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX + * style lock. + */ +static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, +			       struct flock *l) +{ +	struct flock64 ll = { +		.l_type = l->l_type, +		.l_whence = l->l_whence, +		.l_start = l->l_start, +		.l_len = l->l_len, +	}; + +	return flock64_to_posix_lock(filp, fl, &ll); +}  /* default lease lock manager operations */  static void lease_break_callback(struct file_lock *fl) @@ -511,8 +488,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)  }  /* Must be called with the i_lock held! */ -static inline void -locks_insert_global_locks(struct file_lock *fl) +static void locks_insert_global_locks(struct file_lock *fl)  {  	lg_local_lock(&file_lock_lglock);  	fl->fl_link_cpu = smp_processor_id(); @@ -521,8 +497,7 @@ locks_insert_global_locks(struct file_lock *fl)  }  /* Must be called with the i_lock held! */ -static inline void -locks_delete_global_locks(struct file_lock *fl) +static void locks_delete_global_locks(struct file_lock *fl)  {  	/*  	 * Avoid taking lock if already unhashed. This is safe since this check @@ -544,14 +519,12 @@ posix_owner_key(struct file_lock *fl)  	return (unsigned long)fl->fl_owner;  } -static inline void -locks_insert_global_blocked(struct file_lock *waiter) +static void locks_insert_global_blocked(struct file_lock *waiter)  {  	hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));  } -static inline void -locks_delete_global_blocked(struct file_lock *waiter) +static void locks_delete_global_blocked(struct file_lock *waiter)  {  	hash_del(&waiter->fl_link);  } @@ -581,7 +554,7 @@ static void locks_delete_block(struct file_lock *waiter)   * it seems like the reasonable thing to do.   *   * Must be called with both the i_lock and blocked_lock_lock held. The fl_block - * list itself is protected by the file_lock_list, but by ensuring that the + * list itself is protected by the blocked_lock_lock, but by ensuring that the   * i_lock is also held on insertions we can avoid taking the blocked_lock_lock   * in some cases when we see that the fl_block list is empty.   */ @@ -591,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker,  	BUG_ON(!list_empty(&waiter->fl_block));  	waiter->fl_next = blocker;  	list_add_tail(&waiter->fl_block, &blocker->fl_block); -	if (IS_POSIX(blocker)) +	if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker))  		locks_insert_global_blocked(waiter);  } @@ -652,15 +625,18 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)  	locks_insert_global_locks(fl);  } -/* - * Delete a lock and then free it. - * Wake up processes that are blocked waiting for this lock, - * notify the FS that the lock has been cleared and - * finally free the lock. +/** + * locks_delete_lock - Delete a lock and then free it. + * @thisfl_p: pointer that points to the fl_next field of the previous + * 	      inode->i_flock list entry + * + * Unlink a lock from all lists and free the namespace reference, but don't + * free it yet. Wake up processes that are blocked waiting for this lock and + * notify the FS that the lock has been cleared.   *   * Must be called with the i_lock held!   */ -static void locks_delete_lock(struct file_lock **thisfl_p) +static void locks_unlink_lock(struct file_lock **thisfl_p)  {  	struct file_lock *fl = *thisfl_p; @@ -675,6 +651,18 @@ static void locks_delete_lock(struct file_lock **thisfl_p)  	}  	locks_wake_up_blocks(fl); +} + +/* + * Unlink a lock from all lists and free it. + * + * Must be called with i_lock held! + */ +static void locks_delete_lock(struct file_lock **thisfl_p) +{ +	struct file_lock *fl = *thisfl_p; + +	locks_unlink_lock(thisfl_p);  	locks_free_lock(fl);  } @@ -769,8 +757,16 @@ EXPORT_SYMBOL(posix_test_lock);   * Note: the above assumption may not be true when handling lock   * requests from a broken NFS client. It may also fail in the presence   * of tasks (such as posix threads) sharing the same open file table. - *   * To handle those cases, we just bail out after a few iterations. + * + * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. + * Because the owner is not even nominally tied to a thread of + * execution, the deadlock detection below can't reasonably work well. Just + * skip it for those. + * + * In principle, we could do a more limited deadlock detection on FL_FILE_PVT + * locks that just checks for the case where two tasks are attempting to + * upgrade from read to write locks on the same inode.   */  #define MAX_DEADLK_ITERATIONS 10 @@ -793,6 +789,13 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,  {  	int i = 0; +	/* +	 * This deadlock detector can't reasonably detect deadlocks with +	 * FL_FILE_PVT locks, since they aren't owned by a process, per-se. +	 */ +	if (IS_FILE_PVT(caller_fl)) +		return 0; +  	while ((block_fl = what_owner_is_waiting_for(block_fl))) {  		if (i++ > MAX_DEADLK_ITERATIONS)  			return 0; @@ -1152,13 +1155,14 @@ EXPORT_SYMBOL(posix_lock_file_wait);  /**   * locks_mandatory_locked - Check for an active lock - * @inode: the file to check + * @file: the file to check   *   * Searches the inode's list of locks to find any POSIX locks which conflict.   * This function is called from locks_verify_locked() only.   */ -int locks_mandatory_locked(struct inode *inode) +int locks_mandatory_locked(struct file *file)  { +	struct inode *inode = file_inode(file);  	fl_owner_t owner = current->files;  	struct file_lock *fl; @@ -1169,7 +1173,7 @@ int locks_mandatory_locked(struct inode *inode)  	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {  		if (!IS_POSIX(fl))  			continue; -		if (fl->fl_owner != owner) +		if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)  			break;  	}  	spin_unlock(&inode->i_lock); @@ -1195,19 +1199,30 @@ int locks_mandatory_area(int read_write, struct inode *inode,  {  	struct file_lock fl;  	int error; +	bool sleep = false;  	locks_init_lock(&fl); -	fl.fl_owner = current->files;  	fl.fl_pid = current->tgid;  	fl.fl_file = filp;  	fl.fl_flags = FL_POSIX | FL_ACCESS;  	if (filp && !(filp->f_flags & O_NONBLOCK)) -		fl.fl_flags |= FL_SLEEP; +		sleep = true;  	fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;  	fl.fl_start = offset;  	fl.fl_end = offset + count - 1;  	for (;;) { +		if (filp) { +			fl.fl_owner = (fl_owner_t)filp; +			fl.fl_flags &= ~FL_SLEEP; +			error = __posix_lock_file(inode, &fl, NULL); +			if (!error) +				break; +		} + +		if (sleep) +			fl.fl_flags |= FL_SLEEP; +		fl.fl_owner = current->files;  		error = __posix_lock_file(inode, &fl, NULL);  		if (error != FILE_LOCK_DEFERRED)  			break; @@ -1472,6 +1487,32 @@ int fcntl_getlease(struct file *filp)  	return type;  } +/** + * check_conflicting_open - see if the given dentry points to a file that has + * 			    an existing open that would conflict with the + * 			    desired lease. + * @dentry:	dentry to check + * @arg:	type of lease that we're trying to acquire + * + * Check to see if there's an existing open fd on this file that would + * conflict with the lease we're trying to set. + */ +static int +check_conflicting_open(const struct dentry *dentry, const long arg) +{ +	int ret = 0; +	struct inode *inode = dentry->d_inode; + +	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) +		return -EAGAIN; + +	if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || +	    (atomic_read(&inode->i_count) > 1))) +		ret = -EAGAIN; + +	return ret; +} +  static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)  {  	struct file_lock *fl, **before, **my_before = NULL, *lease; @@ -1499,12 +1540,8 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp  		return -EINVAL;  	} -	error = -EAGAIN; -	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) -		goto out; -	if ((arg == F_WRLCK) -	    && ((d_count(dentry) > 1) -		|| (atomic_read(&inode->i_count) > 1))) +	error = check_conflicting_open(dentry, arg); +	if (error)  		goto out;  	/* @@ -1549,7 +1586,19 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp  		goto out;  	locks_insert_lock(before, lease); -	error = 0; +	/* +	 * The check in break_lease() is lockless. It's possible for another +	 * open to race in after we did the earlier check for a conflicting +	 * open but before the lease was inserted. Check again for a +	 * conflicting open and cancel the lease if there is one. +	 * +	 * We also add a barrier here to ensure that the insertion of the lock +	 * precedes these checks. +	 */ +	smp_mb(); +	error = check_conflicting_open(dentry, arg); +	if (error) +		locks_unlink_lock(flp);  out:  	if (is_deleg)  		mutex_unlock(&inode->i_mutex); @@ -1842,7 +1891,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);  static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)  { -	flock->l_pid = fl->fl_pid; +	flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;  #if BITS_PER_LONG == 32  	/*  	 * Make sure we can represent the posix lock via @@ -1864,7 +1913,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)  #if BITS_PER_LONG == 32  static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)  { -	flock->l_pid = fl->fl_pid; +	flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;  	flock->l_start = fl->fl_start;  	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :  		fl->fl_end - fl->fl_start + 1; @@ -1876,7 +1925,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)  /* Report the first existing lock that would conflict with l.   * This implements the F_GETLK command of fcntl().   */ -int fcntl_getlk(struct file *filp, struct flock __user *l) +int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)  {  	struct file_lock file_lock;  	struct flock flock; @@ -1893,6 +1942,16 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)  	if (error)  		goto out; +	if (cmd == F_GETLKP) { +		error = -EINVAL; +		if (flock.l_pid != 0) +			goto out; + +		cmd = F_GETLK; +		file_lock.fl_flags |= FL_FILE_PVT; +		file_lock.fl_owner = (fl_owner_t)filp; +	} +  	error = vfs_test_lock(filp, &file_lock);  	if (error)  		goto out; @@ -2012,25 +2071,32 @@ again:  	error = flock_to_posix_lock(filp, file_lock, &flock);  	if (error)  		goto out; -	if (cmd == F_SETLKW) { -		file_lock->fl_flags |= FL_SLEEP; -	} -	 -	error = -EBADF; -	switch (flock.l_type) { -	case F_RDLCK: -		if (!(filp->f_mode & FMODE_READ)) -			goto out; -		break; -	case F_WRLCK: -		if (!(filp->f_mode & FMODE_WRITE)) + +	/* +	 * If the cmd is requesting file-private locks, then set the +	 * FL_FILE_PVT flag and override the owner. +	 */ +	switch (cmd) { +	case F_SETLKP: +		error = -EINVAL; +		if (flock.l_pid != 0)  			goto out; + +		cmd = F_SETLK; +		file_lock->fl_flags |= FL_FILE_PVT; +		file_lock->fl_owner = (fl_owner_t)filp;  		break; -	case F_UNLCK: -		break; -	default: +	case F_SETLKPW:  		error = -EINVAL; -		goto out; +		if (flock.l_pid != 0) +			goto out; + +		cmd = F_SETLKW; +		file_lock->fl_flags |= FL_FILE_PVT; +		file_lock->fl_owner = (fl_owner_t)filp; +		/* Fallthrough */ +	case F_SETLKW: +		file_lock->fl_flags |= FL_SLEEP;  	}  	error = do_lock_file_wait(filp, cmd, file_lock); @@ -2061,7 +2127,7 @@ out:  /* Report the first existing lock that would conflict with l.   * This implements the F_GETLK command of fcntl().   */ -int fcntl_getlk64(struct file *filp, struct flock64 __user *l) +int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)  {  	struct file_lock file_lock;  	struct flock64 flock; @@ -2078,6 +2144,16 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)  	if (error)  		goto out; +	if (cmd == F_GETLKP) { +		error = -EINVAL; +		if (flock.l_pid != 0) +			goto out; + +		cmd = F_GETLK64; +		file_lock.fl_flags |= FL_FILE_PVT; +		file_lock.fl_owner = (fl_owner_t)filp; +	} +  	error = vfs_test_lock(filp, &file_lock);  	if (error)  		goto out; @@ -2130,25 +2206,32 @@ again:  	error = flock64_to_posix_lock(filp, file_lock, &flock);  	if (error)  		goto out; -	if (cmd == F_SETLKW64) { -		file_lock->fl_flags |= FL_SLEEP; -	} -	 -	error = -EBADF; -	switch (flock.l_type) { -	case F_RDLCK: -		if (!(filp->f_mode & FMODE_READ)) -			goto out; -		break; -	case F_WRLCK: -		if (!(filp->f_mode & FMODE_WRITE)) + +	/* +	 * If the cmd is requesting file-private locks, then set the +	 * FL_FILE_PVT flag and override the owner. +	 */ +	switch (cmd) { +	case F_SETLKP: +		error = -EINVAL; +		if (flock.l_pid != 0)  			goto out; + +		cmd = F_SETLK64; +		file_lock->fl_flags |= FL_FILE_PVT; +		file_lock->fl_owner = (fl_owner_t)filp;  		break; -	case F_UNLCK: -		break; -	default: +	case F_SETLKPW:  		error = -EINVAL; -		goto out; +		if (flock.l_pid != 0) +			goto out; + +		cmd = F_SETLKW64; +		file_lock->fl_flags |= FL_FILE_PVT; +		file_lock->fl_owner = (fl_owner_t)filp; +		/* Fallthrough */ +	case F_SETLKW64: +		file_lock->fl_flags |= FL_SLEEP;  	}  	error = do_lock_file_wait(filp, cmd, file_lock); @@ -2209,7 +2292,7 @@ EXPORT_SYMBOL(locks_remove_posix);  /*   * This function is called on the last close of an open file.   */ -void locks_remove_flock(struct file *filp) +void locks_remove_file(struct file *filp)  {  	struct inode * inode = file_inode(filp);  	struct file_lock *fl; @@ -2218,6 +2301,8 @@ void locks_remove_flock(struct file *filp)  	if (!inode->i_flock)  		return; +	locks_remove_posix(filp, (fl_owner_t)filp); +  	if (filp->f_op->flock) {  		struct file_lock fl = {  			.fl_pid = current->tgid, @@ -2236,16 +2321,28 @@ void locks_remove_flock(struct file *filp)  	while ((fl = *before) != NULL) {  		if (fl->fl_file == filp) { -			if (IS_FLOCK(fl)) { -				locks_delete_lock(before); -				continue; -			}  			if (IS_LEASE(fl)) {  				lease_modify(before, F_UNLCK);  				continue;  			} -			/* What? */ -			BUG(); + +			/* +			 * There's a leftover lock on the list of a type that +			 * we didn't expect to see. Most likely a classic +			 * POSIX lock that ended up not getting released +			 * properly, or that raced onto the list somehow. Log +			 * some info about it and then just remove it from +			 * the list. +			 */ +			WARN(!IS_FLOCK(fl), +				"leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", +				MAJOR(inode->i_sb->s_dev), +				MINOR(inode->i_sb->s_dev), inode->i_ino, +				fl->fl_type, fl->fl_flags, +				fl->fl_start, fl->fl_end); + +			locks_delete_lock(before); +			continue;   		}  		before = &fl->fl_next;  	} @@ -2314,8 +2411,14 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,  	seq_printf(f, "%lld:%s ", id, pfx);  	if (IS_POSIX(fl)) { -		seq_printf(f, "%6s %s ", -			     (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", +		if (fl->fl_flags & FL_ACCESS) +			seq_printf(f, "ACCESS"); +		else if (IS_FILE_PVT(fl)) +			seq_printf(f, "FLPVT "); +		else +			seq_printf(f, "POSIX "); + +		seq_printf(f, " %s ",  			     (inode == NULL) ? "*NOINODE*" :  			     mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");  	} else if (IS_FLOCK(fl)) { @@ -2385,6 +2488,7 @@ static int locks_show(struct seq_file *f, void *v)  }  static void *locks_start(struct seq_file *f, loff_t *pos) +	__acquires(&blocked_lock_lock)  {  	struct locks_iterator *iter = f->private; @@ -2403,6 +2507,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)  }  static void locks_stop(struct seq_file *f, void *v) +	__releases(&blocked_lock_lock)  {  	spin_unlock(&blocked_lock_lock);  	lg_global_unlock(&file_lock_lglock);  | 
