diff options
Diffstat (limited to 'fs/notify/inotify/inotify_user.c')
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 133 |
1 files changed, 87 insertions, 46 deletions
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 107537a543fd..1c4bfdab008d 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -37,6 +37,15 @@ #include <asm/ioctls.h> +/* + * An inotify watch requires allocating an inotify_inode_mark structure as + * well as pinning the watched inode. Doubling the size of a VFS inode + * should be more than enough to cover the additional filesystem inode + * size increase. + */ +#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \ + 2 * sizeof(struct inode)) + /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; @@ -46,22 +55,27 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #include <linux/sysctl.h> -struct ctl_table inotify_table[] = { +static long it_zero = 0; +static long it_int_max = INT_MAX; + +static struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_user_watches", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_queued_events", @@ -73,31 +87,56 @@ struct ctl_table inotify_table[] = { }, { } }; + +static void __init inotify_sysctls_init(void) +{ + register_sysctl("fs/inotify", inotify_table); +} + +#else +#define inotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ -static inline __u32 inotify_arg_to_mask(u32 arg) +static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) { __u32 mask; /* - * everything should accept their own ignored, cares about children, - * and should receive events when the inode is unmounted + * Everything should receive events when the inode is unmounted. + * All directories care about children. */ - mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT); + mask = (FS_UNMOUNT); + if (S_ISDIR(inode->i_mode)) + mask |= FS_EVENT_ON_CHILD; /* mask off the flags used to open the fd */ - mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); + mask |= (arg & INOTIFY_USER_MASK); return mask; } +#define INOTIFY_MARK_FLAGS \ + (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT) + +static inline unsigned int inotify_arg_to_flags(u32 arg) +{ + unsigned int flags = 0; + + if (arg & IN_EXCL_UNLINK) + flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK; + if (arg & IN_ONESHOT) + flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT; + + return flags; +} + static inline u32 inotify_mask_to_arg(__u32 mask) { return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED | IN_Q_OVERFLOW); } -/* intofiy userspace file descriptor functions */ +/* inotify userspace file descriptor functions */ static __poll_t inotify_poll(struct file *file, poll_table *wait) { struct fsnotify_group *group = file->private_data; @@ -135,10 +174,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, size_t event_size = sizeof(struct inotify_event); struct fsnotify_event *event; - if (fsnotify_notify_queue_is_empty(group)) - return NULL; - event = fsnotify_peek_first_event(group); + if (!event) + return NULL; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -341,7 +379,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, if (error) return error; /* you can only watch an inode if you have read permissions on it */ - error = inode_permission(path->dentry->d_inode, MAY_READ); + error = path_permission(path, MAY_READ); if (error) { path_put(path); return error; @@ -484,14 +522,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { struct inotify_inode_mark *i_mark; - struct fsnotify_iter_info iter_info = { }; - - fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE, - fsn_mark); /* Queue ignore event for the watch */ - inotify_handle_event(group, NULL, FS_IN_IGNORED, NULL, - FSNOTIFY_EVENT_NONE, NULL, 0, &iter_info); + inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL, + 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -507,13 +541,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; __u32 old_mask, new_mask; - __u32 mask; - int add = (arg & IN_MASK_ADD); + int replace = !(arg & IN_MASK_ADD); int create = (arg & IN_MASK_CREATE); int ret; - mask = inotify_arg_to_mask(arg); - fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; @@ -526,10 +557,12 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, spin_lock(&fsn_mark->lock); old_mask = fsn_mark->mask; - if (add) - fsn_mark->mask |= mask; - else - fsn_mark->mask = mask; + if (replace) { + fsn_mark->mask = 0; + fsn_mark->flags &= ~INOTIFY_MARK_FLAGS; + } + fsn_mark->mask |= inotify_arg_to_mask(inode, arg); + fsn_mark->flags |= inotify_arg_to_flags(arg); new_mask = fsn_mark->mask; spin_unlock(&fsn_mark->lock); @@ -560,19 +593,17 @@ static int inotify_new_watch(struct fsnotify_group *group, u32 arg) { struct inotify_inode_mark *tmp_i_mark; - __u32 mask; int ret; struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; - mask = inotify_arg_to_mask(arg); - tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) return -ENOMEM; fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); - tmp_i_mark->fsn_mark.mask = mask; + tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg); + tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg); tmp_i_mark->wd = -1; ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); @@ -609,13 +640,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod { int ret = 0; - mutex_lock(&group->mark_mutex); + fsnotify_group_lock(group); /* try to update and existing watch with the new arg */ ret = inotify_update_existing_watch(group, inode, arg); /* no mark present, try to add a new one */ if (ret == -ENOENT) ret = inotify_new_watch(group, inode, arg); - mutex_unlock(&group->mark_mutex); + fsnotify_group_unlock(group); return ret; } @@ -625,17 +656,18 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) struct fsnotify_group *group; struct inotify_event_info *oevent; - group = fsnotify_alloc_group(&inotify_fsnotify_ops); + group = fsnotify_alloc_group(&inotify_fsnotify_ops, + FSNOTIFY_GROUP_USER); if (IS_ERR(group)) return group; - oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL); + oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT); if (unlikely(!oevent)) { fsnotify_destroy_group(group); return ERR_PTR(-ENOMEM); } group->overflow_event = &oevent->fse; - fsnotify_init_event(group->overflow_event, NULL); + fsnotify_init_event(group->overflow_event); oevent->mask = FS_Q_OVERFLOW; oevent->wd = -1; oevent->sync_cookie = 0; @@ -764,20 +796,18 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) struct fsnotify_group *group; struct inotify_inode_mark *i_mark; struct fd f; - int ret = 0; + int ret = -EINVAL; f = fdget(fd); if (unlikely(!f.file)) return -EBADF; /* verify that this is indeed an inotify instance */ - ret = -EINVAL; if (unlikely(f.file->f_op != &inotify_fops)) goto out; group = f.file->private_data; - ret = -EINVAL; i_mark = inotify_idr_find(group, wd); if (unlikely(!i_mark)) goto out; @@ -801,6 +831,18 @@ out: */ static int __init inotify_user_setup(void) { + unsigned long watches_max; + struct sysinfo si; + + si_meminfo(&si); + /* + * Allow up to 1% of addressable memory to be allocated for inotify + * watches (per user) limited to the range [8192, 1048576]. + */ + watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / + INOTIFY_WATCH_COST; + watches_max = clamp(watches_max, 8192UL, 1048576UL); + BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); @@ -816,9 +858,7 @@ static int __init inotify_user_setup(void) BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); - BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); BUILD_BUG_ON(IN_ISDIR != FS_ISDIR); - BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22); @@ -827,7 +867,8 @@ static int __init inotify_user_setup(void) inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; - init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192; + init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max; + inotify_sysctls_init(); return 0; } |