aboutsummaryrefslogtreecommitdiffstats
path: root/fs/notify/inotify/inotify_user.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/notify/inotify/inotify_user.c')
-rw-r--r--fs/notify/inotify/inotify_user.c133
1 files changed, 87 insertions, 46 deletions
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 107537a543fd..1c4bfdab008d 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -37,6 +37,15 @@
#include <asm/ioctls.h>
+/*
+ * An inotify watch requires allocating an inotify_inode_mark structure as
+ * well as pinning the watched inode. Doubling the size of a VFS inode
+ * should be more than enough to cover the additional filesystem inode
+ * size increase.
+ */
+#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \
+ 2 * sizeof(struct inode))
+
/* configurable via /proc/sys/fs/inotify/ */
static int inotify_max_queued_events __read_mostly;
@@ -46,22 +55,27 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
#include <linux/sysctl.h>
-struct ctl_table inotify_table[] = {
+static long it_zero = 0;
+static long it_int_max = INT_MAX;
+
+static struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &it_zero,
+ .extra2 = &it_int_max,
},
{
.procname = "max_user_watches",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
- .maxlen = sizeof(int),
+ .maxlen = sizeof(long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &it_zero,
+ .extra2 = &it_int_max,
},
{
.procname = "max_queued_events",
@@ -73,31 +87,56 @@ struct ctl_table inotify_table[] = {
},
{ }
};
+
+static void __init inotify_sysctls_init(void)
+{
+ register_sysctl("fs/inotify", inotify_table);
+}
+
+#else
+#define inotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
-static inline __u32 inotify_arg_to_mask(u32 arg)
+static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
{
__u32 mask;
/*
- * everything should accept their own ignored, cares about children,
- * and should receive events when the inode is unmounted
+ * Everything should receive events when the inode is unmounted.
+ * All directories care about children.
*/
- mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
+ mask = (FS_UNMOUNT);
+ if (S_ISDIR(inode->i_mode))
+ mask |= FS_EVENT_ON_CHILD;
/* mask off the flags used to open the fd */
- mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
+ mask |= (arg & INOTIFY_USER_MASK);
return mask;
}
+#define INOTIFY_MARK_FLAGS \
+ (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT)
+
+static inline unsigned int inotify_arg_to_flags(u32 arg)
+{
+ unsigned int flags = 0;
+
+ if (arg & IN_EXCL_UNLINK)
+ flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK;
+ if (arg & IN_ONESHOT)
+ flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT;
+
+ return flags;
+}
+
static inline u32 inotify_mask_to_arg(__u32 mask)
{
return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
IN_Q_OVERFLOW);
}
-/* intofiy userspace file descriptor functions */
+/* inotify userspace file descriptor functions */
static __poll_t inotify_poll(struct file *file, poll_table *wait)
{
struct fsnotify_group *group = file->private_data;
@@ -135,10 +174,9 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
size_t event_size = sizeof(struct inotify_event);
struct fsnotify_event *event;
- if (fsnotify_notify_queue_is_empty(group))
- return NULL;
-
event = fsnotify_peek_first_event(group);
+ if (!event)
+ return NULL;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -341,7 +379,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path,
if (error)
return error;
/* you can only watch an inode if you have read permissions on it */
- error = inode_permission(path->dentry->d_inode, MAY_READ);
+ error = path_permission(path, MAY_READ);
if (error) {
path_put(path);
return error;
@@ -484,14 +522,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark *i_mark;
- struct fsnotify_iter_info iter_info = { };
-
- fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE,
- fsn_mark);
/* Queue ignore event for the watch */
- inotify_handle_event(group, NULL, FS_IN_IGNORED, NULL,
- FSNOTIFY_EVENT_NONE, NULL, 0, &iter_info);
+ inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL,
+ 0);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
/* remove this mark from the idr */
@@ -507,13 +541,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
struct fsnotify_mark *fsn_mark;
struct inotify_inode_mark *i_mark;
__u32 old_mask, new_mask;
- __u32 mask;
- int add = (arg & IN_MASK_ADD);
+ int replace = !(arg & IN_MASK_ADD);
int create = (arg & IN_MASK_CREATE);
int ret;
- mask = inotify_arg_to_mask(arg);
-
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark)
return -ENOENT;
@@ -526,10 +557,12 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
spin_lock(&fsn_mark->lock);
old_mask = fsn_mark->mask;
- if (add)
- fsn_mark->mask |= mask;
- else
- fsn_mark->mask = mask;
+ if (replace) {
+ fsn_mark->mask = 0;
+ fsn_mark->flags &= ~INOTIFY_MARK_FLAGS;
+ }
+ fsn_mark->mask |= inotify_arg_to_mask(inode, arg);
+ fsn_mark->flags |= inotify_arg_to_flags(arg);
new_mask = fsn_mark->mask;
spin_unlock(&fsn_mark->lock);
@@ -560,19 +593,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
u32 arg)
{
struct inotify_inode_mark *tmp_i_mark;
- __u32 mask;
int ret;
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
- mask = inotify_arg_to_mask(arg);
-
tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_i_mark))
return -ENOMEM;
fsnotify_init_mark(&tmp_i_mark->fsn_mark, group);
- tmp_i_mark->fsn_mark.mask = mask;
+ tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg);
+ tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg);
tmp_i_mark->wd = -1;
ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
@@ -609,13 +640,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
{
int ret = 0;
- mutex_lock(&group->mark_mutex);
+ fsnotify_group_lock(group);
/* try to update and existing watch with the new arg */
ret = inotify_update_existing_watch(group, inode, arg);
/* no mark present, try to add a new one */
if (ret == -ENOENT)
ret = inotify_new_watch(group, inode, arg);
- mutex_unlock(&group->mark_mutex);
+ fsnotify_group_unlock(group);
return ret;
}
@@ -625,17 +656,18 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
struct fsnotify_group *group;
struct inotify_event_info *oevent;
- group = fsnotify_alloc_group(&inotify_fsnotify_ops);
+ group = fsnotify_alloc_group(&inotify_fsnotify_ops,
+ FSNOTIFY_GROUP_USER);
if (IS_ERR(group))
return group;
- oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL);
+ oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL_ACCOUNT);
if (unlikely(!oevent)) {
fsnotify_destroy_group(group);
return ERR_PTR(-ENOMEM);
}
group->overflow_event = &oevent->fse;
- fsnotify_init_event(group->overflow_event, NULL);
+ fsnotify_init_event(group->overflow_event);
oevent->mask = FS_Q_OVERFLOW;
oevent->wd = -1;
oevent->sync_cookie = 0;
@@ -764,20 +796,18 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
struct fsnotify_group *group;
struct inotify_inode_mark *i_mark;
struct fd f;
- int ret = 0;
+ int ret = -EINVAL;
f = fdget(fd);
if (unlikely(!f.file))
return -EBADF;
/* verify that this is indeed an inotify instance */
- ret = -EINVAL;
if (unlikely(f.file->f_op != &inotify_fops))
goto out;
group = f.file->private_data;
- ret = -EINVAL;
i_mark = inotify_idr_find(group, wd);
if (unlikely(!i_mark))
goto out;
@@ -801,6 +831,18 @@ out:
*/
static int __init inotify_user_setup(void)
{
+ unsigned long watches_max;
+ struct sysinfo si;
+
+ si_meminfo(&si);
+ /*
+ * Allow up to 1% of addressable memory to be allocated for inotify
+ * watches (per user) limited to the range [8192, 1048576].
+ */
+ watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) /
+ INOTIFY_WATCH_COST;
+ watches_max = clamp(watches_max, 8192UL, 1048576UL);
+
BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
@@ -816,9 +858,7 @@ static int __init inotify_user_setup(void)
BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
- BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
- BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
@@ -827,7 +867,8 @@ static int __init inotify_user_setup(void)
inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
- init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
+ init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;
+ inotify_sysctls_init();
return 0;
}