From 2d9048e201bfb67ba21f05e647b1286b8a4a5667 Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 1 Jun 2006 13:10:59 -0700 Subject: [PATCH] inotify (1/5): split kernel API from userspace support The following series of patches introduces a kernel API for inotify, making it possible for kernel modules to benefit from inotify's mechanism for watching inodes. With these patches, inotify will maintain for each caller a list of watches (via an embedded struct inotify_watch), where each inotify_watch is associated with a corresponding struct inode. The caller registers an event handler and specifies for which filesystem events their event handler should be called per inotify_watch. Signed-off-by: Amy Griffis Acked-by: Robert Love Acked-by: John McCutchan Signed-off-by: Al Viro --- fs/Kconfig | 24 +- fs/Makefile | 1 + fs/inotify.c | 941 +++++++++++++----------------------------------------- fs/inotify_user.c | 717 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 966 insertions(+), 717 deletions(-) create mode 100644 fs/inotify_user.c (limited to 'fs') diff --git a/fs/Kconfig b/fs/Kconfig index f9b5842c8d2d..74f11a23622d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -393,18 +393,30 @@ config INOTIFY bool "Inotify file change notification support" default y ---help--- - Say Y here to enable inotify support and the associated system - calls. Inotify is a file change notification system and a - replacement for dnotify. Inotify fixes numerous shortcomings in - dnotify and introduces several new features. It allows monitoring - of both files and directories via a single open fd. Other features - include multiple file events, one-shot support, and unmount + Say Y here to enable inotify support. Inotify is a file change + notification system and a replacement for dnotify. Inotify fixes + numerous shortcomings in dnotify and introduces several new features + including multiple file events, one-shot support, and unmount notification. For more information, see Documentation/filesystems/inotify.txt If unsure, say Y. +config INOTIFY_USER + bool "Inotify support for userspace" + depends on INOTIFY + default y + ---help--- + Say Y here to enable inotify support for userspace, including the + associated system calls. Inotify allows monitoring of both files and + directories via a single open fd. Events are read from the file + descriptor, which is also select()- and poll()-able. + + For more information, see Documentation/filesystems/inotify.txt + + If unsure, say Y. + config QUOTA bool "Quota support" help diff --git a/fs/Makefile b/fs/Makefile index 078d3d1191a5..d0ea6bfccf29 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -13,6 +13,7 @@ obj-y := open.o read_write.o file_table.o buffer.o bio.o super.o \ ioprio.o pnode.o drop_caches.o splice.o sync.o obj-$(CONFIG_INOTIFY) += inotify.o +obj-$(CONFIG_INOTIFY_USER) += inotify_user.o obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o diff --git a/fs/inotify.c b/fs/inotify.c index 732ec4bd5774..a1bedf3975ca 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -5,7 +5,10 @@ * John McCutchan * Robert Love * + * Kernel API added by: Amy Griffis + * * Copyright (C) 2005 John McCutchan + * Copyright 2006 Hewlett-Packard Development Company, L.P. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -20,35 +23,17 @@ #include #include -#include #include #include #include #include -#include -#include -#include -#include #include #include #include #include -#include - -#include static atomic_t inotify_cookie; -static kmem_cache_t *watch_cachep __read_mostly; -static kmem_cache_t *event_cachep __read_mostly; - -static struct vfsmount *inotify_mnt __read_mostly; - -/* these are configurable via /proc/sys/fs/inotify/ */ -int inotify_max_user_instances __read_mostly; -int inotify_max_user_watches __read_mostly; -int inotify_max_queued_events __read_mostly; - /* * Lock ordering: * @@ -56,327 +41,108 @@ int inotify_max_queued_events __read_mostly; * iprune_mutex (synchronize shrink_icache_memory()) * inode_lock (protects the super_block->s_inodes list) * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list) - * inotify_dev->mutex (protects inotify_device and watches->d_list) + * inotify_handle->mutex (protects inotify_handle and watches->h_list) + * + * The inode->inotify_mutex and inotify_handle->mutex and held during execution + * of a caller's event handler. Thus, the caller must not hold any locks + * taken in their event handler while calling any of the published inotify + * interfaces. */ /* - * Lifetimes of the three main data structures--inotify_device, inode, and + * Lifetimes of the three main data structures--inotify_handle, inode, and * inotify_watch--are managed by reference count. * - * inotify_device: Lifetime is from inotify_init() until release. Additional - * references can bump the count via get_inotify_dev() and drop the count via - * put_inotify_dev(). + * inotify_handle: Lifetime is from inotify_init() to inotify_destroy(). + * Additional references can bump the count via get_inotify_handle() and drop + * the count via put_inotify_handle(). * - * inotify_watch: Lifetime is from create_watch() to destory_watch(). - * Additional references can bump the count via get_inotify_watch() and drop - * the count via put_inotify_watch(). + * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch() + * to remove_watch_no_event(). Additional references can bump the count via + * get_inotify_watch() and drop the count via put_inotify_watch(). The caller + * is reponsible for the final put after receiving IN_IGNORED, or when using + * IN_ONESHOT after receiving the first event. Inotify does the final put if + * inotify_destroy() is called. * * inode: Pinned so long as the inode is associated with a watch, from - * create_watch() to put_inotify_watch(). + * inotify_add_watch() to the final put_inotify_watch(). */ /* - * struct inotify_device - represents an inotify instance + * struct inotify_handle - represents an inotify instance * * This structure is protected by the mutex 'mutex'. */ -struct inotify_device { - wait_queue_head_t wq; /* wait queue for i/o */ +struct inotify_handle { struct idr idr; /* idr mapping wd -> watch */ struct mutex mutex; /* protects this bad boy */ - struct list_head events; /* list of queued events */ struct list_head watches; /* list of watches */ atomic_t count; /* reference count */ - struct user_struct *user; /* user who opened this dev */ - unsigned int queue_size; /* size of the queue (bytes) */ - unsigned int event_count; /* number of pending events */ - unsigned int max_events; /* maximum number of events */ u32 last_wd; /* the last wd allocated */ + const struct inotify_operations *in_ops; /* inotify caller operations */ }; -/* - * struct inotify_kernel_event - An inotify event, originating from a watch and - * queued for user-space. A list of these is attached to each instance of the - * device. In read(), this list is walked and all events that can fit in the - * buffer are returned. - * - * Protected by dev->mutex of the device in which we are queued. - */ -struct inotify_kernel_event { - struct inotify_event event; /* the user-space event */ - struct list_head list; /* entry in inotify_device's list */ - char *name; /* filename, if any */ -}; - -/* - * struct inotify_watch - represents a watch request on a specific inode - * - * d_list is protected by dev->mutex of the associated watch->dev. - * i_list and mask are protected by inode->inotify_mutex of the associated inode. - * dev, inode, and wd are never written to once the watch is created. - */ -struct inotify_watch { - struct list_head d_list; /* entry in inotify_device's list */ - struct list_head i_list; /* entry in inode's list */ - atomic_t count; /* reference count */ - struct inotify_device *dev; /* associated device */ - struct inode *inode; /* associated inode */ - s32 wd; /* watch descriptor */ - u32 mask; /* event mask for this watch */ -}; - -#ifdef CONFIG_SYSCTL - -#include - -static int zero; - -ctl_table inotify_table[] = { - { - .ctl_name = INOTIFY_MAX_USER_INSTANCES, - .procname = "max_user_instances", - .data = &inotify_max_user_instances, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &zero, - }, - { - .ctl_name = INOTIFY_MAX_USER_WATCHES, - .procname = "max_user_watches", - .data = &inotify_max_user_watches, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &zero, - }, - { - .ctl_name = INOTIFY_MAX_QUEUED_EVENTS, - .procname = "max_queued_events", - .data = &inotify_max_queued_events, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &zero - }, - { .ctl_name = 0 } -}; -#endif /* CONFIG_SYSCTL */ - -static inline void get_inotify_dev(struct inotify_device *dev) +static inline void get_inotify_handle(struct inotify_handle *ih) { - atomic_inc(&dev->count); + atomic_inc(&ih->count); } -static inline void put_inotify_dev(struct inotify_device *dev) +static inline void put_inotify_handle(struct inotify_handle *ih) { - if (atomic_dec_and_test(&dev->count)) { - atomic_dec(&dev->user->inotify_devs); - free_uid(dev->user); - idr_destroy(&dev->idr); - kfree(dev); + if (atomic_dec_and_test(&ih->count)) { + idr_destroy(&ih->idr); + kfree(ih); } } -static inline void get_inotify_watch(struct inotify_watch *watch) +/** + * get_inotify_watch - grab a reference to an inotify_watch + * @watch: watch to grab + */ +void get_inotify_watch(struct inotify_watch *watch) { atomic_inc(&watch->count); } +EXPORT_SYMBOL_GPL(get_inotify_watch); -/* +/** * put_inotify_watch - decrements the ref count on a given watch. cleans up - * the watch and its references if the count reaches zero. + * watch references if the count reaches zero. inotify_watch is freed by + * inotify callers via the destroy_watch() op. + * @watch: watch to release */ -static inline void put_inotify_watch(struct inotify_watch *watch) +void put_inotify_watch(struct inotify_watch *watch) { if (atomic_dec_and_test(&watch->count)) { - put_inotify_dev(watch->dev); - iput(watch->inode); - kmem_cache_free(watch_cachep, watch); - } -} - -/* - * kernel_event - create a new kernel event with the given parameters - * - * This function can sleep. - */ -static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, - const char *name) -{ - struct inotify_kernel_event *kevent; - - kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL); - if (unlikely(!kevent)) - return NULL; - - /* we hand this out to user-space, so zero it just in case */ - memset(&kevent->event, 0, sizeof(struct inotify_event)); - - kevent->event.wd = wd; - kevent->event.mask = mask; - kevent->event.cookie = cookie; - - INIT_LIST_HEAD(&kevent->list); - - if (name) { - size_t len, rem, event_size = sizeof(struct inotify_event); - - /* - * We need to pad the filename so as to properly align an - * array of inotify_event structures. Because the structure is - * small and the common case is a small filename, we just round - * up to the next multiple of the structure's sizeof. This is - * simple and safe for all architectures. - */ - len = strlen(name) + 1; - rem = event_size - len; - if (len > event_size) { - rem = event_size - (len % event_size); - if (len % event_size == 0) - rem = 0; - } - - kevent->name = kmalloc(len + rem, GFP_KERNEL); - if (unlikely(!kevent->name)) { - kmem_cache_free(event_cachep, kevent); - return NULL; - } - memcpy(kevent->name, name, len); - if (rem) - memset(kevent->name + len, 0, rem); - kevent->event.len = len + rem; - } else { - kevent->event.len = 0; - kevent->name = NULL; - } - - return kevent; -} - -/* - * inotify_dev_get_event - return the next event in the given dev's queue - * - * Caller must hold dev->mutex. - */ -static inline struct inotify_kernel_event * -inotify_dev_get_event(struct inotify_device *dev) -{ - return list_entry(dev->events.next, struct inotify_kernel_event, list); -} - -/* - * inotify_dev_queue_event - add a new event to the given device - * - * Caller must hold dev->mutex. Can sleep (calls kernel_event()). - */ -static void inotify_dev_queue_event(struct inotify_device *dev, - struct inotify_watch *watch, u32 mask, - u32 cookie, const char *name) -{ - struct inotify_kernel_event *kevent, *last; - - /* coalescing: drop this event if it is a dupe of the previous */ - last = inotify_dev_get_event(dev); - if (last && last->event.mask == mask && last->event.wd == watch->wd && - last->event.cookie == cookie) { - const char *lastname = last->name; - - if (!name && !lastname) - return; - if (name && lastname && !strcmp(lastname, name)) - return; - } - - /* the queue overflowed and we already sent the Q_OVERFLOW event */ - if (unlikely(dev->event_count > dev->max_events)) - return; - - /* if the queue overflows, we need to notify user space */ - if (unlikely(dev->event_count == dev->max_events)) - kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL); - else - kevent = kernel_event(watch->wd, mask, cookie, name); - - if (unlikely(!kevent)) - return; - - /* queue the event and wake up anyone waiting */ - dev->event_count++; - dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; - list_add_tail(&kevent->list, &dev->events); - wake_up_interruptible(&dev->wq); -} - -/* - * remove_kevent - cleans up and ultimately frees the given kevent - * - * Caller must hold dev->mutex. - */ -static void remove_kevent(struct inotify_device *dev, - struct inotify_kernel_event *kevent) -{ - list_del(&kevent->list); + struct inotify_handle *ih = watch->ih; - dev->event_count--; - dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; - - kfree(kevent->name); - kmem_cache_free(event_cachep, kevent); -} - -/* - * inotify_dev_event_dequeue - destroy an event on the given device - * - * Caller must hold dev->mutex. - */ -static void inotify_dev_event_dequeue(struct inotify_device *dev) -{ - if (!list_empty(&dev->events)) { - struct inotify_kernel_event *kevent; - kevent = inotify_dev_get_event(dev); - remove_kevent(dev, kevent); + iput(watch->inode); + ih->in_ops->destroy_watch(watch); + put_inotify_handle(ih); } } +EXPORT_SYMBOL_GPL(put_inotify_watch); /* - * inotify_dev_get_wd - returns the next WD for use by the given dev + * inotify_handle_get_wd - returns the next WD for use by the given handle * - * Callers must hold dev->mutex. This function can sleep. + * Callers must hold ih->mutex. This function can sleep. */ -static int inotify_dev_get_wd(struct inotify_device *dev, - struct inotify_watch *watch) +static int inotify_handle_get_wd(struct inotify_handle *ih, + struct inotify_watch *watch) { int ret; do { - if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL))) + if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL))) return -ENOSPC; - ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd); + ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd); } while (ret == -EAGAIN); - return ret; -} + if (likely(!ret)) + ih->last_wd = watch->wd; -/* - * find_inode - resolve a user-given path to a specific inode and return a nd - */ -static int find_inode(const char __user *dirname, struct nameidata *nd, - unsigned flags) -{ - int error; - - error = __user_walk(dirname, flags, nd); - if (error) - return error; - /* you can only watch an inode if you have read permissions on it */ - error = vfs_permission(nd, MAY_READ); - if (error) - path_release(nd); - return error; + return ret; } /* @@ -422,67 +188,18 @@ static void set_dentry_child_flags(struct inode *inode, int watched) } /* - * create_watch - creates a watch on the given device. - * - * Callers must hold dev->mutex. Calls inotify_dev_get_wd() so may sleep. - * Both 'dev' and 'inode' (by way of nameidata) need to be pinned. - */ -static struct inotify_watch *create_watch(struct inotify_device *dev, - u32 mask, struct inode *inode) -{ - struct inotify_watch *watch; - int ret; - - if (atomic_read(&dev->user->inotify_watches) >= - inotify_max_user_watches) - return ERR_PTR(-ENOSPC); - - watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL); - if (unlikely(!watch)) - return ERR_PTR(-ENOMEM); - - ret = inotify_dev_get_wd(dev, watch); - if (unlikely(ret)) { - kmem_cache_free(watch_cachep, watch); - return ERR_PTR(ret); - } - - dev->last_wd = watch->wd; - watch->mask = mask; - atomic_set(&watch->count, 0); - INIT_LIST_HEAD(&watch->d_list); - INIT_LIST_HEAD(&watch->i_list); - - /* save a reference to device and bump the count to make it official */ - get_inotify_dev(dev); - watch->dev = dev; - - /* - * Save a reference to the inode and bump the ref count to make it - * official. We hold a reference to nameidata, which makes this safe. - */ - watch->inode = igrab(inode); - - /* bump our own count, corresponding to our entry in dev->watches */ - get_inotify_watch(watch); - - atomic_inc(&dev->user->inotify_watches); - - return watch; -} - -/* - * inotify_find_dev - find the watch associated with the given inode and dev + * inotify_find_handle - find the watch associated with the given inode and + * handle * * Callers must hold inode->inotify_mutex. */ -static struct inotify_watch *inode_find_dev(struct inode *inode, - struct inotify_device *dev) +static struct inotify_watch *inode_find_handle(struct inode *inode, + struct inotify_handle *ih) { struct inotify_watch *watch; list_for_each_entry(watch, &inode->inotify_watches, i_list) { - if (watch->dev == dev) + if (watch->ih == ih) return watch; } @@ -491,39 +208,34 @@ static struct inotify_watch *inode_find_dev(struct inode *inode, /* * remove_watch_no_event - remove_watch() without the IN_IGNORED event. + * + * Callers must hold both inode->inotify_mutex and ih->mutex. */ static void remove_watch_no_event(struct inotify_watch *watch, - struct inotify_device *dev) + struct inotify_handle *ih) { list_del(&watch->i_list); - list_del(&watch->d_list); + list_del(&watch->h_list); if (!inotify_inode_watched(watch->inode)) set_dentry_child_flags(watch->inode, 0); - atomic_dec(&dev->user->inotify_watches); - idr_remove(&dev->idr, watch->wd); - put_inotify_watch(watch); + idr_remove(&ih->idr, watch->wd); } /* - * remove_watch - Remove a watch from both the device and the inode. Sends - * the IN_IGNORED event to the given device signifying that the inode is no - * longer watched. - * - * Callers must hold both inode->inotify_mutex and dev->mutex. We drop a - * reference to the inode before returning. + * remove_watch - Remove a watch from both the handle and the inode. Sends + * the IN_IGNORED event signifying that the inode is no longer watched. * - * The inode is not iput() so as to remain atomic. If the inode needs to be - * iput(), the call returns one. Otherwise, it returns zero. + * Callers must hold both inode->inotify_mutex and ih->mutex. */ -static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev) +static void remove_watch(struct inotify_watch *watch, struct inotify_handle *ih) { - inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL); - remove_watch_no_event(watch, dev); + remove_watch_no_event(watch, ih); + ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL); } -/* Kernel API */ +/* Kernel API for producing events */ /* * inotify_d_instantiate - instantiate dcache entry for inode @@ -576,14 +288,12 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie, list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { u32 watch_mask = watch->mask; if (watch_mask & mask) { - struct inotify_device *dev = watch->dev; - get_inotify_watch(watch); - mutex_lock(&dev->mutex); - inotify_dev_queue_event(dev, watch, mask, cookie, name); + struct inotify_handle *ih= watch->ih; + mutex_lock(&ih->mutex); if (watch_mask & IN_ONESHOT) - remove_watch_no_event(watch, dev); - mutex_unlock(&dev->mutex); - put_inotify_watch(watch); + remove_watch_no_event(watch, ih); + ih->in_ops->handle_event(watch, watch->wd, mask, cookie, name); + mutex_unlock(&ih->mutex); } } mutex_unlock(&inode->inotify_mutex); @@ -694,11 +404,12 @@ void inotify_unmount_inodes(struct list_head *list) mutex_lock(&inode->inotify_mutex); watches = &inode->inotify_watches; list_for_each_entry_safe(watch, next_w, watches, i_list) { - struct inotify_device *dev = watch->dev; - mutex_lock(&dev->mutex); - inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL); - remove_watch(watch, dev); - mutex_unlock(&dev->mutex); + struct inotify_handle *ih= watch->ih; + mutex_lock(&ih->mutex); + ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, + NULL); + remove_watch(watch, ih); + mutex_unlock(&ih->mutex); } mutex_unlock(&inode->inotify_mutex); iput(inode); @@ -718,432 +429,240 @@ void inotify_inode_is_dead(struct inode *inode) mutex_lock(&inode->inotify_mutex); list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { - struct inotify_device *dev = watch->dev; - mutex_lock(&dev->mutex); - remove_watch(watch, dev); - mutex_unlock(&dev->mutex); + struct inotify_handle *ih = watch->ih; + mutex_lock(&ih->mutex); + remove_watch(watch, ih); + mutex_unlock(&ih->mutex); } mutex_unlock(&inode->inotify_mutex); } EXPORT_SYMBOL_GPL(inotify_inode_is_dead); -/* Device Interface */ - -static unsigned int inotify_poll(struct file *file, poll_table *wait) -{ - struct inotify_device *dev = file->private_data; - int ret = 0; - - poll_wait(file, &dev->wq, wait); - mutex_lock(&dev->mutex); - if (!list_empty(&dev->events)) - ret = POLLIN | POLLRDNORM; - mutex_unlock(&dev->mutex); - - return ret; -} +/* Kernel Consumer API */ -static ssize_t inotify_read(struct file *file, char __user *buf, - size_t count, loff_t *pos) +/** + * inotify_init - allocate and initialize an inotify instance + * @ops: caller's inotify operations + */ +struct inotify_handle *inotify_init(const struct inotify_operations *ops) { - size_t event_size = sizeof (struct inotify_event); - struct inotify_device *dev; - char __user *start; - int ret; - DEFINE_WAIT(wait); - - start = buf; - dev = file->private_data; - - while (1) { - int events; - - prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); + struct inotify_handle *ih; - mutex_lock(&dev->mutex); - events = !list_empty(&dev->events); - mutex_unlock(&dev->mutex); - if (events) { - ret = 0; - break; - } - - if (file->f_flags & O_NONBLOCK) { - ret = -EAGAIN; - break; - } - - if (signal_pending(current)) { - ret = -EINTR; - break; - } - - schedule(); - } - - finish_wait(&dev->wq, &wait); - if (ret) - return ret; - - mutex_lock(&dev->mutex); - while (1) { - struct inotify_kernel_event *kevent; - - ret = buf - start; - if (list_empty(&dev->events)) - break; - - kevent = inotify_dev_get_event(dev); - if (event_size + kevent->event.len > count) - break; - - if (copy_to_user(buf, &kevent->event, event_size)) { - ret = -EFAULT; - break; - } - buf += event_size; - count -= event_size; - - if (kevent->name) { - if (copy_to_user(buf, kevent->name, kevent->event.len)){ - ret = -EFAULT; - break; - } - buf += kevent->event.len; - count -= kevent->event.len; - } + ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL); + if (unlikely(!ih)) + return ERR_PTR(-ENOMEM); - remove_kevent(dev, kevent); - } - mutex_unlock(&dev->mutex); + idr_init(&ih->idr); + INIT_LIST_HEAD(&ih->watches); + mutex_init(&ih->mutex); + ih->last_wd = 0; + ih->in_ops = ops; + atomic_set(&ih->count, 0); + get_inotify_handle(ih); - return ret; + return ih; } +EXPORT_SYMBOL_GPL(inotify_init); -static int inotify_release(struct inode *ignored, struct file *file) +/** + * inotify_destroy - clean up and destroy an inotify instance + * @ih: inotify handle + */ +void inotify_destroy(struct inotify_handle *ih) { - struct inotify_device *dev = file->private_data; - /* - * Destroy all of the watches on this device. Unfortunately, not very + * Destroy all of the watches for this handle. Unfortunately, not very * pretty. We cannot do a simple iteration over the list, because we * do not know the inode until we iterate to the watch. But we need to - * hold inode->inotify_mutex before dev->mutex. The following works. + * hold inode->inotify_mutex before ih->mutex. The following works. */ while (1) { struct inotify_watch *watch; struct list_head *watches; struct inode *inode; - mutex_lock(&dev->mutex); - watches = &dev->watches; + mutex_lock(&ih->mutex); + watches = &ih->watches; if (list_empty(watches)) { - mutex_unlock(&dev->mutex); + mutex_unlock(&ih->mutex); break; } - watch = list_entry(watches->next, struct inotify_watch, d_list); + watch = list_entry(watches->next, struct inotify_watch, h_list); get_inotify_watch(watch); - mutex_unlock(&dev->mutex); + mutex_unlock(&ih->mutex); inode = watch->inode; mutex_lock(&inode->inotify_mutex); - mutex_lock(&dev->mutex); + mutex_lock(&ih->mutex); /* make sure we didn't race with another list removal */ - if (likely(idr_find(&dev->idr, watch->wd))) - remove_watch_no_event(watch, dev); + if (likely(idr_find(&ih->idr, watch->wd))) { + remove_watch_no_event(watch, ih); + put_inotify_watch(watch); + } - mutex_unlock(&dev->mutex); + mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); put_inotify_watch(watch); } - /* destroy all of the events on this device */ - mutex_lock(&dev->mutex); - while (!list_empty(&dev->events)) - inotify_dev_event_dequeue(dev); - mutex_unlock(&dev->mutex); - - /* free this device: the put matching the get in inotify_init() */ - put_inotify_dev(dev); - - return 0; + /* free this handle: the put matching the get in inotify_init() */ + put_inotify_handle(ih); } +EXPORT_SYMBOL_GPL(inotify_destroy); -/* - * inotify_ignore - remove a given wd from this inotify instance. +/** + * inotify_find_update_watch - find and update the mask of an existing watch + * @ih: inotify handle + * @inode: inode's watch to update + * @mask: mask of events to watch * - * Can sleep. + * Caller must pin given inode (via nameidata). */ -static int inotify_ignore(struct inotify_device *dev, s32 wd) +s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode, + u32 mask) { - struct inotify_watch *watch; - struct inode *inode; + struct inotify_watch *old; + int mask_add = 0; + int ret; - mutex_lock(&dev->mutex); - watch = idr_find(&dev->idr, wd); - if (unlikely(!watch)) { - mutex_unlock(&dev->mutex); + if (mask & IN_MASK_ADD) + mask_add = 1; + + /* don't allow invalid bits: we don't want flags set */ + mask &= IN_ALL_EVENTS | IN_ONESHOT; + if (unlikely(!mask)) return -EINVAL; - } - get_inotify_watch(watch); - inode = watch->inode; - mutex_unlock(&dev->mutex); mutex_lock(&inode->inotify_mutex); - mutex_lock(&dev->mutex); - - /* make sure that we did not race */ - if (likely(idr_find(&dev->idr, wd) == watch)) - remove_watch(watch, dev); - - mutex_unlock(&dev->mutex); - mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(watch); - - return 0; -} - -static long inotify_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct inotify_device *dev; - void __user *p; - int ret = -ENOTTY; + mutex_lock(&ih->mutex); - dev = file->private_data; - p = (void __user *) arg; - - switch (cmd) { - case FIONREAD: - ret = put_user(dev->queue_size, (int __user *) p); - break; - } - - return ret; -} - -static const struct file_operations inotify_fops = { - .poll = inotify_poll, - .read = inotify_read, - .release = inotify_release, - .unlocked_ioctl = inotify_ioctl, - .compat_ioctl = inotify_ioctl, -}; - -asmlinkage long sys_inotify_init(void) -{ - struct inotify_device *dev; - struct user_struct *user; - struct file *filp; - int fd, ret; - - fd = get_unused_fd(); - if (fd < 0) - return fd; - - filp = get_empty_filp(); - if (!filp) { - ret = -ENFILE; - goto out_put_fd; - } - - user = get_uid(current->user); - if (unlikely(atomic_read(&user->inotify_devs) >= - inotify_max_user_instances)) { - ret = -EMFILE; - goto out_free_uid; - } - - dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); - if (unlikely(!dev)) { - ret = -ENOMEM; - goto out_free_uid; + /* + * Handle the case of re-adding a watch on an (inode,ih) pair that we + * are already watching. We just update the mask and return its wd. + */ + old = inode_find_handle(inode, ih); + if (unlikely(!old)) { + ret = -ENOENT; + goto out; } - filp->f_op = &inotify_fops; - filp->f_vfsmnt = mntget(inotify_mnt); - filp->f_dentry = dget(inotify_mnt->mnt_root); - filp->f_mapping = filp->f_dentry->d_inode->i_mapping; - filp->f_mode = FMODE_READ; - filp->f_flags = O_RDONLY; - filp->private_data = dev; - - idr_init(&dev->idr); - INIT_LIST_HEAD(&dev->events); - INIT_LIST_HEAD(&dev->watches); - init_waitqueue_head(&dev->wq); - mutex_init(&dev->mutex); - dev->event_count = 0; - dev->queue_size = 0; - dev->max_events = inotify_max_queued_events; - dev->user = user; - dev->last_wd = 0; - atomic_set(&dev->count, 0); - - get_inotify_dev(dev); - atomic_inc(&user->inotify_devs); - fd_install(fd, filp); - - return fd; -out_free_uid: - free_uid(user); - put_filp(filp); -out_put_fd: - put_unused_fd(fd); + if (mask_add) + old->mask |= mask; + else + old->mask = mask; + ret = old->wd; +out: + mutex_unlock(&ih->mutex); + mutex_unlock(&inode->inotify_mutex); return ret; } +EXPORT_SYMBOL_GPL(inotify_find_update_watch); -asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) +/** + * inotify_add_watch - add a watch to an inotify instance + * @ih: inotify handle + * @watch: caller allocated watch structure + * @inode: inode to watch + * @mask: mask of events to watch + * + * Caller must pin given inode (via nameidata). + * Caller must ensure it only calls inotify_add_watch() once per watch. + * Calls inotify_handle_get_wd() so may sleep. + */ +s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, + struct inode *inode, u32 mask) { - struct inotify_watch *watch, *old; - struct inode *inode; - struct inotify_device *dev; - struct nameidata nd; - struct file *filp; - int ret, fput_needed; - int mask_add = 0; - unsigned flags = 0; - - filp = fget_light(fd, &fput_needed); - if (unlikely(!filp)) - return -EBADF; + int ret = 0; - /* verify that this is indeed an inotify instance */ - if (unlikely(filp->f_op != &inotify_fops)) { - ret = -EINVAL; - goto fput_and_out; - } + /* don't allow invalid bits: we don't want flags set */ + mask &= IN_ALL_EVENTS | IN_ONESHOT; + if (unlikely(!mask)) + return -EINVAL; + watch->mask = mask; - if (!(mask & IN_DONT_FOLLOW)) - flags |= LOOKUP_FOLLOW; - if (mask & IN_ONLYDIR) - flags |= LOOKUP_DIRECTORY; + mutex_lock(&inode->inotify_mutex); + mutex_lock(&ih->mutex); - ret = find_inode(path, &nd, flags); + /* Initialize a new watch */ + ret = inotify_handle_get_wd(ih, watch); if (unlikely(ret)) - goto fput_and_out; - - /* inode held in place by reference to nd; dev by fget on fd */ - inode = nd.dentry->d_inode; - dev = filp->private_data; - - mutex_lock(&inode->inotify_mutex); - mutex_lock(&dev->mutex); + goto out; + ret = watch->wd; - if (mask & IN_MASK_ADD) - mask_add = 1; + atomic_set(&watch->count, 0); + INIT_LIST_HEAD(&watch->h_list); + INIT_LIST_HEAD(&watch->i_list); - /* don't let user-space set invalid bits: we don't want flags set */ - mask &= IN_ALL_EVENTS | IN_ONESHOT; - if (unlikely(!mask)) { - ret = -EINVAL; - goto out; - } + /* save a reference to handle and bump the count to make it official */ + get_inotify_handle(ih); + watch->ih = ih; /* - * Handle the case of re-adding a watch on an (inode,dev) pair that we - * are already watching. We just update the mask and return its wd. + * Save a reference to the inode and bump the ref count to make it + * official. We hold a reference to nameidata, which makes this safe. */ - old = inode_find_dev(inode, dev); - if (unlikely(old)) { - if (mask_add) - old->mask |= mask; - else - old->mask = mask; - ret = old->wd; - goto out; - } + watch->inode = igrab(inode); - watch = create_watch(dev, mask, inode); - if (unlikely(IS_ERR(watch))) { - ret = PTR_ERR(watch); - goto out; - } + get_inotify_watch(watch); /* initial get */ if (!inotify_inode_watched(inode)) set_dentry_child_flags(inode, 1); - /* Add the watch to the device's and the inode's list */ - list_add(&watch->d_list, &dev->watches); + /* Add the watch to the handle's and the inode's list */ + list_add(&watch->h_list, &ih->watches); list_add(&watch->i_list, &inode->inotify_watches); - ret = watch->wd; out: - mutex_unlock(&dev->mutex); + mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); - path_release(&nd); -fput_and_out: - fput_light(filp, fput_needed); return ret; } +EXPORT_SYMBOL_GPL(inotify_add_watch); -asmlinkage long sys_inotify_rm_watch(int fd, u32 wd) +/** + * inotify_rm_wd - remove a watch from an inotify instance + * @ih: inotify handle + * @wd: watch descriptor to remove + * + * Can sleep. + */ +int inotify_rm_wd(struct inotify_handle *ih, u32 wd) { - struct file *filp; - struct inotify_device *dev; - int ret, fput_needed; - - filp = fget_light(fd, &fput_needed); - if (unlikely(!filp)) - return -EBADF; + struct inotify_watch *watch; + struct inode *inode; - /* verify that this is indeed an inotify instance */ - if (unlikely(filp->f_op != &inotify_fops)) { - ret = -EINVAL; - goto out; + mutex_lock(&ih->mutex); + watch = idr_find(&ih->idr, wd); + if (unlikely(!watch)) { + mutex_unlock(&ih->mutex); + return -EINVAL; } + get_inotify_watch(watch); + inode = watch->inode; + mutex_unlock(&ih->mutex); - dev = filp->private_data; - ret = inotify_ignore(dev, wd); + mutex_lock(&inode->inotify_mutex); + mutex_lock(&ih->mutex); -out: - fput_light(filp, fput_needed); - return ret; -} + /* make sure that we did not race */ + if (likely(idr_find(&ih->idr, wd) == watch)) + remove_watch(watch, ih); -static struct super_block * -inotify_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA); -} + mutex_unlock(&ih->mutex); + mutex_unlock(&inode->inotify_mutex); + put_inotify_watch(watch); -static struct file_system_type inotify_fs_type = { - .name = "inotifyfs", - .get_sb = inotify_get_sb, - .kill_sb = kill_anon_super, -}; + return 0; +} +EXPORT_SYMBOL_GPL(inotify_rm_wd); /* - * inotify_setup - Our initialization function. Note that we cannnot return - * error because we have compiled-in VFS hooks. So an (unlikely) failure here - * must result in panic(). + * inotify_setup - core initialization function */ static int __init inotify_setup(void) { - int ret; - - ret = register_filesystem(&inotify_fs_type); - if (unlikely(ret)) - panic("inotify: register_filesystem returned %d!\n", ret); - - inotify_mnt = kern_mount(&inotify_fs_type); - if (IS_ERR(inotify_mnt)) - panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt)); - - inotify_max_queued_events = 16384; - inotify_max_user_instances = 128; - inotify_max_user_watches = 8192; - atomic_set(&inotify_cookie, 0); - watch_cachep = kmem_cache_create("inotify_watch_cache", - sizeof(struct inotify_watch), - 0, SLAB_PANIC, NULL, NULL); - event_cachep = kmem_cache_create("inotify_event_cache", - sizeof(struct inotify_kernel_event), - 0, SLAB_PANIC, NULL, NULL); - return 0; } diff --git a/fs/inotify_user.c b/fs/inotify_user.c new file mode 100644 index 000000000000..845dc79a4e9c --- /dev/null +++ b/fs/inotify_user.c @@ -0,0 +1,717 @@ +/* + * fs/inotify_user.c - inotify support for userspace + * + * Authors: + * John McCutchan + * Robert Love + * + * Copyright (C) 2005 John McCutchan + * Copyright 2006 Hewlett-Packard Development Company, L.P. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static kmem_cache_t *watch_cachep __read_mostly; +static kmem_cache_t *event_cachep __read_mostly; + +static struct vfsmount *inotify_mnt __read_mostly; + +/* these are configurable via /proc/sys/fs/inotify/ */ +int inotify_max_user_instances __read_mostly; +int inotify_max_user_watches __read_mostly; +int inotify_max_queued_events __read_mostly; + +/* + * Lock ordering: + * + * inotify_dev->up_mutex (ensures we don't re-add the same watch) + * inode->inotify_mutex (protects inode's watch list) + * inotify_handle->mutex (protects inotify_handle's watch list) + * inotify_dev->ev_mutex (protects device's event queue) + */ + +/* + * Lifetimes of the main data structures: + * + * inotify_device: Lifetime is managed by reference count, from + * sys_inotify_init() until release. Additional references can bump the count + * via get_inotify_dev() and drop the count via put_inotify_dev(). + * + * inotify_user_watch: Lifetime is from create_watch() to the receipt of an + * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the + * first event, or to inotify_destroy(). + */ + +/* + * struct inotify_device - represents an inotify instance + * + * This structure is protected by the mutex 'mutex'. + */ +struct inotify_device { + wait_queue_head_t wq; /* wait queue for i/o */ + struct mutex ev_mutex; /* protects event queue */ + struct mutex up_mutex; /* synchronizes watch updates */ + struct list_head events; /* list of queued events */ + atomic_t count; /* reference count */ + struct user_struct *user; /* user who opened this dev */ + struct inotify_handle *ih; /* inotify handle */ + unsigned int queue_size; /* size of the queue (bytes) */ + unsigned int event_count; /* number of pending events */ + unsigned int max_events; /* maximum number of events */ +}; + +/* + * struct inotify_kernel_event - An inotify event, originating from a watch and + * queued for user-space. A list of these is attached to each instance of the + * device. In read(), this list is walked and all events that can fit in the + * buffer are returned. + * + * Protected by dev->ev_mutex of the device in which we are queued. + */ +struct inotify_kernel_event { + struct inotify_event event; /* the user-space event */ + struct list_head list; /* entry in inotify_device's list */ + char *name; /* filename, if any */ +}; + +/* + * struct inotify_user_watch - our version of an inotify_watch, we add + * a reference to the associated inotify_device. + */ +struct inotify_user_watch { + struct inotify_device *dev; /* associated device */ + struct inotify_watch wdata; /* inotify watch data */ +}; + +#ifdef CONFIG_SYSCTL + +#include + +static int zero; + +ctl_table inotify_table[] = { + { + .ctl_name = INOTIFY_MAX_USER_INSTANCES, + .procname = "max_user_instances", + .data = &inotify_max_user_instances, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + { + .ctl_name = INOTIFY_MAX_USER_WATCHES, + .procname = "max_user_watches", + .data = &inotify_max_user_watches, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + }, + { + .ctl_name = INOTIFY_MAX_QUEUED_EVENTS, + .procname = "max_queued_events", + .data = &inotify_max_queued_events, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero + }, + { .ctl_name = 0 } +}; +#endif /* CONFIG_SYSCTL */ + +static inline void get_inotify_dev(struct inotify_device *dev) +{ + atomic_inc(&dev->count); +} + +static inline void put_inotify_dev(struct inotify_device *dev) +{ + if (atomic_dec_and_test(&dev->count)) { + atomic_dec(&dev->user->inotify_devs); + free_uid(dev->user); + kfree(dev); + } +} + +/* + * free_inotify_user_watch - cleans up the watch and its references + */ +static void free_inotify_user_watch(struct inotify_watch *w) +{ + struct inotify_user_watch *watch; + struct inotify_device *dev; + + watch = container_of(w, struct inotify_user_watch, wdata); + dev = watch->dev; + + atomic_dec(&dev->user->inotify_watches); + put_inotify_dev(dev); + kmem_cache_free(watch_cachep, watch); +} + +/* + * kernel_event - create a new kernel event with the given parameters + * + * This function can sleep. + */ +static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie, + const char *name) +{ + struct inotify_kernel_event *kevent; + + kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL); + if (unlikely(!kevent)) + return NULL; + + /* we hand this out to user-space, so zero it just in case */ + memset(&kevent->event, 0, sizeof(struct inotify_event)); + + kevent->event.wd = wd; + kevent->event.mask = mask; + kevent->event.cookie = cookie; + + INIT_LIST_HEAD(&kevent->list); + + if (name) { + size_t len, rem, event_size = sizeof(struct inotify_event); + + /* + * We need to pad the filename so as to properly align an + * array of inotify_event structures. Because the structure is + * small and the common case is a small filename, we just round + * up to the next multiple of the structure's sizeof. This is + * simple and safe for all architectures. + */ + len = strlen(name) + 1; + rem = event_size - len; + if (len > event_size) { + rem = event_size - (len % event_size); + if (len % event_size == 0) + rem = 0; + } + + kevent->name = kmalloc(len + rem, GFP_KERNEL); + if (unlikely(!kevent->name)) { + kmem_cache_free(event_cachep, kevent); + return NULL; + } + memcpy(kevent->name, name, len); + if (rem) + memset(kevent->name + len, 0, rem); + kevent->event.len = len + rem; + } else { + kevent->event.len = 0; + kevent->name = NULL; + } + + return kevent; +} + +/* + * inotify_dev_get_event - return the next event in the given dev's queue + * + * Caller must hold dev->ev_mutex. + */ +static inline struct inotify_kernel_event * +inotify_dev_get_event(struct inotify_device *dev) +{ + return list_entry(dev->events.next, struct inotify_kernel_event, list); +} + +/* + * inotify_dev_queue_event - event handler registered with core inotify, adds + * a new event to the given device + * + * Can sleep (calls kernel_event()). + */ +static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, + u32 cookie, const char *name) +{ + struct inotify_user_watch *watch; + struct inotify_device *dev; + struct inotify_kernel_event *kevent, *last; + + watch = container_of(w, struct inotify_user_watch, wdata); + dev = watch->dev; + + mutex_lock(&dev->ev_mutex); + + /* we can safely put the watch as we don't reference it while + * generating the event + */ + if (mask & IN_IGNORED || mask & IN_ONESHOT) + put_inotify_watch(w); /* final put */ + + /* coalescing: drop this event if it is a dupe of the previous */ + last = inotify_dev_get_event(dev); + if (last && last->event.mask == mask && last->event.wd == wd && + last->event.cookie == cookie) { + const char *lastname = last->name; + + if (!name && !lastname) + goto out; + if (name && lastname && !strcmp(lastname, name)) + goto out; + } + + /* the queue overflowed and we already sent the Q_OVERFLOW event */ + if (unlikely(dev->event_count > dev->max_events)) + goto out; + + /* if the queue overflows, we need to notify user space */ + if (unlikely(dev->event_count == dev->max_events)) + kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL); + else + kevent = kernel_event(wd, mask, cookie, name); + + if (unlikely(!kevent)) + goto out; + + /* queue the event and wake up anyone waiting */ + dev->event_count++; + dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; + list_add_tail(&kevent->list, &dev->events); + wake_up_interruptible(&dev->wq); + +out: + mutex_unlock(&dev->ev_mutex); +} + +/* + * remove_kevent - cleans up and ultimately frees the given kevent + * + * Caller must hold dev->ev_mutex. + */ +static void remove_kevent(struct inotify_device *dev, + struct inotify_kernel_event *kevent) +{ + list_del(&kevent->list); + + dev->event_count--; + dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; + + kfree(kevent->name); + kmem_cache_free(event_cachep, kevent); +} + +/* + * inotify_dev_event_dequeue - destroy an event on the given device + * + * Caller must hold dev->ev_mutex. + */ +static void inotify_dev_event_dequeue(struct inotify_device *dev) +{ + if (!list_empty(&dev->events)) { + struct inotify_kernel_event *kevent; + kevent = inotify_dev_get_event(dev); + remove_kevent(dev, kevent); + } +} + +/* + * find_inode - resolve a user-given path to a specific inode and return a nd + */ +static int find_inode(const char __user *dirname, struct nameidata *nd, + unsigned flags) +{ + int error; + + error = __user_walk(dirname, flags, nd); + if (error) + return error; + /* you can only watch an inode if you have read permissions on it */ + error = vfs_permission(nd, MAY_READ); + if (error) + path_release(nd); + return error; +} + +/* + * create_watch - creates a watch on the given device. + * + * Callers must hold dev->up_mutex. + */ +static int create_watch(struct inotify_device *dev, struct inode *inode, + u32 mask) +{ + struct inotify_user_watch *watch; + int ret; + + if (atomic_read(&dev->user->inotify_watches) >= + inotify_max_user_watches) + return -ENOSPC; + + watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL); + if (unlikely(!watch)) + return -ENOMEM; + + /* save a reference to device and bump the count to make it official */ + get_inotify_dev(dev); + watch->dev = dev; + + atomic_inc(&dev->user->inotify_watches); + + ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask); + if (ret < 0) + free_inotify_user_watch(&watch->wdata); + + return ret; +} + +/* Device Interface */ + +static unsigned int inotify_poll(struct file *file, poll_table *wait) +{ + struct inotify_device *dev = file->private_data; + int ret = 0; + + poll_wait(file, &dev->wq, wait); + mutex_lock(&dev->ev_mutex); + if (!list_empty(&dev->events)) + ret = POLLIN | POLLRDNORM; + mutex_unlock(&dev->ev_mutex); + + return ret; +} + +static ssize_t inotify_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + size_t event_size = sizeof (struct inotify_event); + struct inotify_device *dev; + char __user *start; + int ret; + DEFINE_WAIT(wait); + + start = buf; + dev = file->private_data; + + while (1) { + int events; + + prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); + + mutex_lock(&dev->ev_mutex); + events = !list_empty(&dev->events); + mutex_unlock(&dev->ev_mutex); + if (events) { + ret = 0; + break; + } + + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + break; + } + + if (signal_pending(current)) { + ret = -EINTR; + break; + } + + schedule(); + } + + finish_wait(&dev->wq, &wait); + if (ret) + return ret; + + mutex_lock(&dev->ev_mutex); + while (1) { + struct inotify_kernel_event *kevent; + + ret = buf - start; + if (list_empty(&dev->events)) + break; + + kevent = inotify_dev_get_event(dev); + if (event_size + kevent->event.len > count) + break; + + if (copy_to_user(buf, &kevent->event, event_size)) { + ret = -EFAULT; + break; + } + buf += event_size; + count -= event_size; + + if (kevent->name) { + if (copy_to_user(buf, kevent->name, kevent->event.len)){ + ret = -EFAULT; + break; + } + buf += kevent->event.len; + count -= kevent->event.len; + } + + remove_kevent(dev, kevent); + } + mutex_unlock(&dev->ev_mutex); + + return ret; +} + +static int inotify_release(struct inode *ignored, struct file *file) +{ + struct inotify_device *dev = file->private_data; + + inotify_destroy(dev->ih); + + /* destroy all of the events on this device */ + mutex_lock(&dev->ev_mutex); + while (!list_empty(&dev->events)) + inotify_dev_event_dequeue(dev); + mutex_unlock(&dev->ev_mutex); + + /* free this device: the put matching the get in inotify_init() */ + put_inotify_dev(dev); + + return 0; +} + +static long inotify_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inotify_device *dev; + void __user *p; + int ret = -ENOTTY; + + dev = file->private_data; + p = (void __user *) arg; + + switch (cmd) { + case FIONREAD: + ret = put_user(dev->queue_size, (int __user *) p); + break; + } + + return ret; +} + +static const struct file_operations inotify_fops = { + .poll = inotify_poll, + .read = inotify_read, + .release = inotify_release, + .unlocked_ioctl = inotify_ioctl, + .compat_ioctl = inotify_ioctl, +}; + +static const struct inotify_operations inotify_user_ops = { + .handle_event = inotify_dev_queue_event, + .destroy_watch = free_inotify_user_watch, +}; + +asmlinkage long sys_inotify_init(void) +{ + struct inotify_device *dev; + struct inotify_handle *ih; + struct user_struct *user; + struct file *filp; + int fd, ret; + + fd = get_unused_fd(); + if (fd < 0) + return fd; + + filp = get_empty_filp(); + if (!filp) { + ret = -ENFILE; + goto out_put_fd; + } + + user = get_uid(current->user); + if (unlikely(atomic_read(&user->inotify_devs) >= + inotify_max_user_instances)) { + ret = -EMFILE; + goto out_free_uid; + } + + dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); + if (unlikely(!dev)) { + ret = -ENOMEM; + goto out_free_uid; + } + + ih = inotify_init(&inotify_user_ops); + if (unlikely(IS_ERR(ih))) { + ret = PTR_ERR(ih); + goto out_free_dev; + } + dev->ih = ih; + + filp->f_op = &inotify_fops; + filp->f_vfsmnt = mntget(inotify_mnt); + filp->f_dentry = dget(inotify_mnt->mnt_root); + filp->f_mapping = filp->f_dentry->d_inode->i_mapping; + filp->f_mode = FMODE_READ; + filp->f_flags = O_RDONLY; + filp->private_data = dev; + + INIT_LIST_HEAD(&dev->events); + init_waitqueue_head(&dev->wq); + mutex_init(&dev->ev_mutex); + mutex_init(&dev->up_mutex); + dev->event_count = 0; + dev->queue_size = 0; + dev->max_events = inotify_max_queued_events; + dev->user = user; + atomic_set(&dev->count, 0); + + get_inotify_dev(dev); + atomic_inc(&user->inotify_devs); + fd_install(fd, filp); + + return fd; +out_free_dev: + kfree(dev); +out_free_uid: + free_uid(user); + put_filp(filp); +out_put_fd: + put_unused_fd(fd); + return ret; +} + +asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) +{ + struct inode *inode; + struct inotify_device *dev; + struct nameidata nd; + struct file *filp; + int ret, fput_needed; + unsigned flags = 0; + + filp = fget_light(fd, &fput_needed); + if (unlikely(!filp)) + return -EBADF; + + /* verify that this is indeed an inotify instance */ + if (unlikely(filp->f_op != &inotify_fops)) { + ret = -EINVAL; + goto fput_and_out; + } + + if (!(mask & IN_DONT_FOLLOW)) + flags |= LOOKUP_FOLLOW; + if (mask & IN_ONLYDIR) + flags |= LOOKUP_DIRECTORY; + + ret = find_inode(path, &nd, flags); + if (unlikely(ret)) + goto fput_and_out; + + /* inode held in place by reference to nd; dev by fget on fd */ + inode = nd.dentry->d_inode; + dev = filp->private_data; + + mutex_lock(&dev->up_mutex); + ret = inotify_find_update_watch(dev->ih, inode, mask); + if (ret == -ENOENT) + ret = create_watch(dev, inode, mask); + mutex_unlock(&dev->up_mutex); + + path_release(&nd); +fput_and_out: + fput_light(filp, fput_needed); + return ret; +} + +asmlinkage long sys_inotify_rm_watch(int fd, u32 wd) +{ + struct file *filp; + struct inotify_device *dev; + int ret, fput_needed; + + filp = fget_light(fd, &fput_needed); + if (unlikely(!filp)) + return -EBADF; + + /* verify that this is indeed an inotify instance */ + if (unlikely(filp->f_op != &inotify_fops)) { + ret = -EINVAL; + goto out; + } + + dev = filp->private_data; + + /* we free our watch data when we get IN_IGNORED */ + ret = inotify_rm_wd(dev->ih, wd); + +out: + fput_light(filp, fput_needed); + return ret; +} + +static struct super_block * +inotify_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA); +} + +static struct file_system_type inotify_fs_type = { + .name = "inotifyfs", + .get_sb = inotify_get_sb, + .kill_sb = kill_anon_super, +}; + +/* + * inotify_user_setup - Our initialization function. Note that we cannnot return + * error because we have compiled-in VFS hooks. So an (unlikely) failure here + * must result in panic(). + */ +static int __init inotify_user_setup(void) +{ + int ret; + + ret = register_filesystem(&inotify_fs_type); + if (unlikely(ret)) + panic("inotify: register_filesystem returned %d!\n", ret); + + inotify_mnt = kern_mount(&inotify_fs_type); + if (IS_ERR(inotify_mnt)) + panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt)); + + inotify_max_queued_events = 16384; + inotify_max_user_instances = 128; + inotify_max_user_watches = 8192; + + watch_cachep = kmem_cache_create("inotify_watch_cache", + sizeof(struct inotify_user_watch), + 0, SLAB_PANIC, NULL, NULL); + event_cachep = kmem_cache_create("inotify_event_cache", + sizeof(struct inotify_kernel_event), + 0, SLAB_PANIC, NULL, NULL); + + return 0; +} + +module_init(inotify_user_setup); -- cgit v1.2.3-59-g8ed1b From 7c29772288b7026504cfe75bfd90d40fbd1574bf Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 1 Jun 2006 13:11:01 -0700 Subject: [PATCH] inotify (2/5): add name's inode to event handler When an inotify event includes a dentry name, also include the inode associated with that name. Signed-off-by: Amy Griffis Acked-by: Robert Love Acked-by: John McCutchan Signed-off-by: Al Viro --- fs/inotify.c | 13 ++++++++----- fs/inotify_user.c | 3 ++- include/linux/fsnotify.h | 29 ++++++++++++++++------------- include/linux/inotify.h | 7 ++++--- 4 files changed, 30 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/inotify.c b/fs/inotify.c index a1bedf3975ca..f25c21801fdc 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -232,7 +232,7 @@ static void remove_watch_no_event(struct inotify_watch *watch, static void remove_watch(struct inotify_watch *watch, struct inotify_handle *ih) { remove_watch_no_event(watch, ih); - ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL); + ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL); } /* Kernel API for producing events */ @@ -275,9 +275,10 @@ void inotify_d_move(struct dentry *entry) * @mask: event mask describing this event * @cookie: cookie for synchronization, or zero * @name: filename, if any + * @n_inode: inode associated with name */ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie, - const char *name) + const char *name, struct inode *n_inode) { struct inotify_watch *watch, *next; @@ -292,7 +293,8 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie, mutex_lock(&ih->mutex); if (watch_mask & IN_ONESHOT) remove_watch_no_event(watch, ih); - ih->in_ops->handle_event(watch, watch->wd, mask, cookie, name); + ih->in_ops->handle_event(watch, watch->wd, mask, cookie, + name, n_inode); mutex_unlock(&ih->mutex); } } @@ -323,7 +325,8 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, if (inotify_inode_watched(inode)) { dget(parent); spin_unlock(&dentry->d_lock); - inotify_inode_queue_event(inode, mask, cookie, name); + inotify_inode_queue_event(inode, mask, cookie, name, + dentry->d_inode); dput(parent); } else spin_unlock(&dentry->d_lock); @@ -407,7 +410,7 @@ void inotify_unmount_inodes(struct list_head *list) struct inotify_handle *ih= watch->ih; mutex_lock(&ih->mutex); ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, - NULL); + NULL, NULL); remove_watch(watch, ih); mutex_unlock(&ih->mutex); } diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 845dc79a4e9c..8b83c7190067 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -253,7 +253,8 @@ inotify_dev_get_event(struct inotify_device *dev) * Can sleep (calls kernel_event()). */ static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, - u32 cookie, const char *name) + u32 cookie, const char *name, + struct inode *ignored) { struct inotify_user_watch *watch; struct inotify_device *dev; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 11438eff4d44..a9d30442448f 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -54,16 +54,18 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (isdir) isdir = IN_ISDIR; - inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name); - inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name); + inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name, + source); + inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, + source); if (target) { - inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL); + inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(target); } if (source) { - inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL); + inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); } audit_inode_child(old_name, source, old_dir->i_ino); audit_inode_child(new_name, target, new_dir->i_ino); @@ -85,7 +87,7 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) */ static inline void fsnotify_inoderemove(struct inode *inode) { - inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL); + inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); } @@ -95,7 +97,8 @@ static inline void fsnotify_inoderemove(struct inode *inode) static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { inode_dir_notify(inode, DN_CREATE); - inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name); + inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, + dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino); } @@ -106,7 +109,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, - dentry->d_name.name); + dentry->d_name.name, dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino); } @@ -123,7 +126,7 @@ static inline void fsnotify_access(struct dentry *dentry) dnotify_parent(dentry, DN_ACCESS); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); - inotify_inode_queue_event(inode, mask, 0, NULL); + inotify_inode_queue_event(inode, mask, 0, NULL, NULL); } /* @@ -139,7 +142,7 @@ static inline void fsnotify_modify(struct dentry *dentry) dnotify_parent(dentry, DN_MODIFY); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); - inotify_inode_queue_event(inode, mask, 0, NULL); + inotify_inode_queue_event(inode, mask, 0, NULL, NULL); } /* @@ -154,7 +157,7 @@ static inline void fsnotify_open(struct dentry *dentry) mask |= IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); - inotify_inode_queue_event(inode, mask, 0, NULL); + inotify_inode_queue_event(inode, mask, 0, NULL, NULL); } /* @@ -172,7 +175,7 @@ static inline void fsnotify_close(struct file *file) mask |= IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, name); - inotify_inode_queue_event(inode, mask, 0, NULL); + inotify_inode_queue_event(inode, mask, 0, NULL, NULL); } /* @@ -187,7 +190,7 @@ static inline void fsnotify_xattr(struct dentry *dentry) mask |= IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); - inotify_inode_queue_event(inode, mask, 0, NULL); + inotify_inode_queue_event(inode, mask, 0, NULL, NULL); } /* @@ -234,7 +237,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) if (in_mask) { if (S_ISDIR(inode->i_mode)) in_mask |= IN_ISDIR; - inotify_inode_queue_event(inode, in_mask, 0, NULL); + inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); inotify_dentry_parent_queue_event(dentry, in_mask, 0, dentry->d_name.name); } diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 68b6e0127de4..e7899e7d83ad 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -91,7 +91,7 @@ struct inotify_watch { struct inotify_operations { void (*handle_event)(struct inotify_watch *, u32, u32, u32, - const char *); + const char *, struct inode *); void (*destroy_watch)(struct inotify_watch *); }; @@ -102,7 +102,7 @@ struct inotify_operations { extern void inotify_d_instantiate(struct dentry *, struct inode *); extern void inotify_d_move(struct dentry *); extern void inotify_inode_queue_event(struct inode *, __u32, __u32, - const char *); + const char *, struct inode *); extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, const char *); extern void inotify_unmount_inodes(struct list_head *); @@ -134,7 +134,8 @@ static inline void inotify_d_move(struct dentry *dentry) static inline void inotify_inode_queue_event(struct inode *inode, __u32 mask, __u32 cookie, - const char *filename) + const char *filename, + struct inode *n_inode) { } -- cgit v1.2.3-59-g8ed1b From a9dc971d3fdb857a2bcd6d53238125a2cd31d5f4 Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 1 Jun 2006 13:11:03 -0700 Subject: [PATCH] inotify (3/5): add interfaces to kernel API Add inotify_init_watch() so caller can use inotify_watch refcounts before calling inotify_add_watch(). Add inotify_find_watch() to find an existing watch for an (ih,inode) pair. This is similar to inotify_find_update_watch(), but does not update the watch's mask if one is found. Add inotify_rm_watch() to remove a watch via the watch pointer instead of the watch descriptor. Signed-off-by: Amy Griffis Acked-by: Robert Love Acked-by: John McCutchan Signed-off-by: Al Viro --- fs/inotify.c | 64 ++++++++++++++++++++++++++++++++++++++++++++----- fs/inotify_user.c | 1 + include/linux/inotify.h | 20 ++++++++++++++++ 3 files changed, 79 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/inotify.c b/fs/inotify.c index f25c21801fdc..8477c4fbecb4 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -467,6 +467,19 @@ struct inotify_handle *inotify_init(const struct inotify_operations *ops) } EXPORT_SYMBOL_GPL(inotify_init); +/** + * inotify_init_watch - initialize an inotify watch + * @watch: watch to initialize + */ +void inotify_init_watch(struct inotify_watch *watch) +{ + INIT_LIST_HEAD(&watch->h_list); + INIT_LIST_HEAD(&watch->i_list); + atomic_set(&watch->count, 0); + get_inotify_watch(watch); /* initial get */ +} +EXPORT_SYMBOL_GPL(inotify_init_watch); + /** * inotify_destroy - clean up and destroy an inotify instance * @ih: inotify handle @@ -514,6 +527,37 @@ void inotify_destroy(struct inotify_handle *ih) } EXPORT_SYMBOL_GPL(inotify_destroy); +/** + * inotify_find_watch - find an existing watch for an (ih,inode) pair + * @ih: inotify handle + * @inode: inode to watch + * @watchp: pointer to existing inotify_watch + * + * Caller must pin given inode (via nameidata). + */ +s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode, + struct inotify_watch **watchp) +{ + struct inotify_watch *old; + int ret = -ENOENT; + + mutex_lock(&inode->inotify_mutex); + mutex_lock(&ih->mutex); + + old = inode_find_handle(inode, ih); + if (unlikely(old)) { + get_inotify_watch(old); /* caller must put watch */ + *watchp = old; + ret = old->wd; + } + + mutex_unlock(&ih->mutex); + mutex_unlock(&inode->inotify_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(inotify_find_watch); + /** * inotify_find_update_watch - find and update the mask of an existing watch * @ih: inotify handle @@ -593,10 +637,6 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, goto out; ret = watch->wd; - atomic_set(&watch->count, 0); - INIT_LIST_HEAD(&watch->h_list); - INIT_LIST_HEAD(&watch->i_list); - /* save a reference to handle and bump the count to make it official */ get_inotify_handle(ih); watch->ih = ih; @@ -607,8 +647,6 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, */ watch->inode = igrab(inode); - get_inotify_watch(watch); /* initial get */ - if (!inotify_inode_watched(inode)) set_dentry_child_flags(inode, 1); @@ -659,6 +697,20 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) } EXPORT_SYMBOL_GPL(inotify_rm_wd); +/** + * inotify_rm_watch - remove a watch from an inotify instance + * @ih: inotify handle + * @watch: watch to remove + * + * Can sleep. + */ +int inotify_rm_watch(struct inotify_handle *ih, + struct inotify_watch *watch) +{ + return inotify_rm_wd(ih, watch->wd); +} +EXPORT_SYMBOL_GPL(inotify_rm_watch); + /* * inotify_setup - core initialization function */ diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 8b83c7190067..9e9931e2badd 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -380,6 +380,7 @@ static int create_watch(struct inotify_device *dev, struct inode *inode, atomic_inc(&dev->user->inotify_watches); + inotify_init_watch(&watch->wdata); ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask); if (ret < 0) free_inotify_user_watch(&watch->wdata); diff --git a/include/linux/inotify.h b/include/linux/inotify.h index e7899e7d83ad..e7e7fb7fc778 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -112,11 +112,15 @@ extern u32 inotify_get_cookie(void); /* Kernel Consumer API */ extern struct inotify_handle *inotify_init(const struct inotify_operations *); +extern void inotify_init_watch(struct inotify_watch *); extern void inotify_destroy(struct inotify_handle *); +extern __s32 inotify_find_watch(struct inotify_handle *, struct inode *, + struct inotify_watch **); extern __s32 inotify_find_update_watch(struct inotify_handle *, struct inode *, u32); extern __s32 inotify_add_watch(struct inotify_handle *, struct inotify_watch *, struct inode *, __u32); +extern int inotify_rm_watch(struct inotify_handle *, struct inotify_watch *); extern int inotify_rm_wd(struct inotify_handle *, __u32); extern void get_inotify_watch(struct inotify_watch *); extern void put_inotify_watch(struct inotify_watch *); @@ -163,10 +167,20 @@ static inline struct inotify_handle *inotify_init(const struct inotify_operation return ERR_PTR(-EOPNOTSUPP); } +static inline void inotify_init_watch(struct inotify_watch *watch) +{ +} + static inline void inotify_destroy(struct inotify_handle *ih) { } +static inline __s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode, + struct inotify_watch **watchp) +{ + return -EOPNOTSUPP; +} + static inline __s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode, u32 mask) { @@ -180,6 +194,12 @@ static inline __s32 inotify_add_watch(struct inotify_handle *ih, return -EOPNOTSUPP; } +static inline int inotify_rm_watch(struct inotify_handle *ih, + struct inotify_watch *watch) +{ + return -EOPNOTSUPP; +} + static inline int inotify_rm_wd(struct inotify_handle *ih, __u32 wd) { return -EOPNOTSUPP; -- cgit v1.2.3-59-g8ed1b From 3ca10067f7f4bfa62a1b0edc84f590261fa02d75 Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 1 Jun 2006 13:11:05 -0700 Subject: [PATCH] inotify (4/5): allow watch removal from event handler Allow callers to remove watches from their event handler via inotify_remove_watch_locked(). This functionality can be used to achieve IN_ONESHOT-like functionality for a subset of events in the mask. Signed-off-by: Amy Griffis Acked-by: Robert Love Acked-by: John McCutchan Signed-off-by: Al Viro --- fs/inotify.c | 23 ++++++++++++++--------- include/linux/inotify.h | 7 +++++++ 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/inotify.c b/fs/inotify.c index 8477c4fbecb4..723836a1f718 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -207,7 +207,7 @@ static struct inotify_watch *inode_find_handle(struct inode *inode, } /* - * remove_watch_no_event - remove_watch() without the IN_IGNORED event. + * remove_watch_no_event - remove watch without the IN_IGNORED event. * * Callers must hold both inode->inotify_mutex and ih->mutex. */ @@ -223,17 +223,22 @@ static void remove_watch_no_event(struct inotify_watch *watch, idr_remove(&ih->idr, watch->wd); } -/* - * remove_watch - Remove a watch from both the handle and the inode. Sends - * the IN_IGNORED event signifying that the inode is no longer watched. +/** + * inotify_remove_watch_locked - Remove a watch from both the handle and the + * inode. Sends the IN_IGNORED event signifying that the inode is no longer + * watched. May be invoked from a caller's event handler. + * @ih: inotify handle associated with watch + * @watch: watch to remove * * Callers must hold both inode->inotify_mutex and ih->mutex. */ -static void remove_watch(struct inotify_watch *watch, struct inotify_handle *ih) +void inotify_remove_watch_locked(struct inotify_handle *ih, + struct inotify_watch *watch) { remove_watch_no_event(watch, ih); ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL); } +EXPORT_SYMBOL_GPL(inotify_remove_watch_locked); /* Kernel API for producing events */ @@ -378,7 +383,7 @@ void inotify_unmount_inodes(struct list_head *list) need_iput_tmp = need_iput; need_iput = NULL; - /* In case the remove_watch() drops a reference. */ + /* In case inotify_remove_watch_locked() drops a reference. */ if (inode != need_iput_tmp) __iget(inode); else @@ -411,7 +416,7 @@ void inotify_unmount_inodes(struct list_head *list) mutex_lock(&ih->mutex); ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL); - remove_watch(watch, ih); + inotify_remove_watch_locked(ih, watch); mutex_unlock(&ih->mutex); } mutex_unlock(&inode->inotify_mutex); @@ -434,7 +439,7 @@ void inotify_inode_is_dead(struct inode *inode) list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { struct inotify_handle *ih = watch->ih; mutex_lock(&ih->mutex); - remove_watch(watch, ih); + inotify_remove_watch_locked(ih, watch); mutex_unlock(&ih->mutex); } mutex_unlock(&inode->inotify_mutex); @@ -687,7 +692,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) /* make sure that we did not race */ if (likely(idr_find(&ih->idr, wd) == watch)) - remove_watch(watch, ih); + inotify_remove_watch_locked(ih, watch); mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); diff --git a/include/linux/inotify.h b/include/linux/inotify.h index e7e7fb7fc778..d4f48c6402e6 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -122,6 +122,8 @@ extern __s32 inotify_add_watch(struct inotify_handle *, struct inotify_watch *, struct inode *, __u32); extern int inotify_rm_watch(struct inotify_handle *, struct inotify_watch *); extern int inotify_rm_wd(struct inotify_handle *, __u32); +extern void inotify_remove_watch_locked(struct inotify_handle *, + struct inotify_watch *); extern void get_inotify_watch(struct inotify_watch *); extern void put_inotify_watch(struct inotify_watch *); @@ -205,6 +207,11 @@ static inline int inotify_rm_wd(struct inotify_handle *ih, __u32 wd) return -EOPNOTSUPP; } +static inline void inotify_remove_watch_locked(struct inotify_handle *ih, + struct inotify_watch *watch) +{ +} + static inline void get_inotify_watch(struct inotify_watch *watch) { } -- cgit v1.2.3-59-g8ed1b From 473ae30bc7b1dda5c5791c773f95e9424ddfead9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 Apr 2006 14:04:08 -0400 Subject: [PATCH] execve argument logging Signed-off-by: Al Viro --- fs/exec.c | 6 ++++++ include/linux/audit.h | 6 +++++- kernel/audit.c | 8 +++++--- kernel/auditsc.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 3a79d97ac234..d07858c0b7c4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1085,6 +1086,11 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) /* kernel module loader fixup */ /* so we don't try to load run modprobe in kernel space. */ set_fs(USER_DS); + + retval = audit_bprm(bprm); + if (retval) + return retval; + retval = -ENOENT; for (try=0; try<2; try++) { read_lock(&binfmt_lock); diff --git a/include/linux/audit.h b/include/linux/audit.h index e65399bf2710..1a221b65f7b7 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -83,6 +83,7 @@ #define AUDIT_CONFIG_CHANGE 1305 /* Audit system configuration change */ #define AUDIT_SOCKADDR 1306 /* sockaddr copied as syscall arg */ #define AUDIT_CWD 1307 /* Current working directory */ +#define AUDIT_EXECVE 1309 /* execve arguments */ #define AUDIT_IPC_SET_PERM 1311 /* IPC new permissions record type */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ @@ -283,6 +284,7 @@ struct audit_buffer; struct audit_context; struct inode; struct netlink_skb_parms; +struct linux_binprm; #define AUDITSC_INVALID 0 #define AUDITSC_SUCCESS 1 @@ -322,6 +324,7 @@ extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); extern int audit_ipc_obj(struct kern_ipc_perm *ipcp); extern int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp); +extern int audit_bprm(struct linux_binprm *bprm); extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); @@ -342,6 +345,7 @@ extern int audit_set_macxattr(const char *name); #define audit_get_loginuid(c) ({ -1; }) #define audit_ipc_obj(i) ({ 0; }) #define audit_ipc_set_perm(q,u,g,m,i) ({ 0; }) +#define audit_bprm(p) ({ 0; }) #define audit_socketcall(n,a) ({ 0; }) #define audit_sockaddr(len, addr) ({ 0; }) #define audit_avc_path(dentry, mnt) ({ 0; }) @@ -364,7 +368,7 @@ extern void audit_log_end(struct audit_buffer *ab); extern void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len); -extern void audit_log_untrustedstring(struct audit_buffer *ab, +extern const char * audit_log_untrustedstring(struct audit_buffer *ab, const char *string); extern void audit_log_d_path(struct audit_buffer *ab, const char *prefix, diff --git a/kernel/audit.c b/kernel/audit.c index bf74bf02aa4b..d09f131b111a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1026,18 +1026,20 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, * or a space. Unescaped strings will start and end with a double quote mark. * Strings that are escaped are printed in hex (2 digits per char). */ -void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) +const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) { const unsigned char *p = string; + size_t len = strlen(string); while (*p) { if (*p == '"' || *p < 0x21 || *p > 0x7f) { - audit_log_hex(ab, string, strlen(string)); - return; + audit_log_hex(ab, string, len); + return string + len + 1; } p++; } audit_log_format(ab, "\"%s\"", string); + return p + 1; } /* This is a helper-function to print the escaped d_path */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1c03a4ed1b27..114f921979ec 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "audit.h" @@ -110,6 +111,13 @@ struct audit_aux_data_ipcctl { u32 osid; }; +struct audit_aux_data_execve { + struct audit_aux_data d; + int argc; + int envc; + char mem[0]; +}; + struct audit_aux_data_socketcall { struct audit_aux_data d; int nargs; @@ -667,6 +675,16 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts kfree(ctx); } break; } + case AUDIT_EXECVE: { + struct audit_aux_data_execve *axi = (void *)aux; + int i; + const char *p; + for (i = 0, p = axi->mem; i < axi->argc; i++) { + audit_log_format(ab, "a%d=", i); + p = audit_log_untrustedstring(ab, p); + audit_log_format(ab, "\n"); + } + break; } case AUDIT_SOCKETCALL: { int i; @@ -1231,6 +1249,39 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, return 0; } +int audit_bprm(struct linux_binprm *bprm) +{ + struct audit_aux_data_execve *ax; + struct audit_context *context = current->audit_context; + unsigned long p, next; + void *to; + + if (likely(!audit_enabled || !context)) + return 0; + + ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, + GFP_KERNEL); + if (!ax) + return -ENOMEM; + + ax->argc = bprm->argc; + ax->envc = bprm->envc; + for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) { + struct page *page = bprm->page[p / PAGE_SIZE]; + void *kaddr = kmap(page); + next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1); + memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p); + to += next - p; + kunmap(page); + } + + ax->d.type = AUDIT_EXECVE; + ax->d.next = context->aux; + context->aux = (void *)ax; + return 0; +} + + /** * audit_socketcall - record audit data for sys_socketcall * @nargs: number of args -- cgit v1.2.3-59-g8ed1b From e0182909297da8d38a5d473ae7bee3d0324632a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 May 2006 08:28:02 -0400 Subject: [PATCH] proc_loginuid_write() uses simple_strtoul() on non-terminated array Signed-off-by: Al Viro --- fs/proc/base.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 6cc77dc3f3ff..6afff725a8c9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1019,8 +1019,8 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, if (current != task) return -EPERM; - if (count > PAGE_SIZE) - count = PAGE_SIZE; + if (count >= PAGE_SIZE) + count = PAGE_SIZE - 1; if (*ppos != 0) { /* No partial writes. */ @@ -1033,6 +1033,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, if (copy_from_user(page, buf, count)) goto out_free_page; + page[count] = '\0'; loginuid = simple_strtoul(page, &tmp, 10); if (tmp == page) { length = -EINVAL; -- cgit v1.2.3-59-g8ed1b From 9c937dcc71021f2dbf78f904f03d962dd9bcc130 Mon Sep 17 00:00:00 2001 From: Amy Griffis Date: Thu, 8 Jun 2006 23:19:31 -0400 Subject: [PATCH] log more info for directory entry change events When an audit event involves changes to a directory entry, include a PATH record for the directory itself. A few other notable changes: - fixed audit_inode_child() hooks in fsnotify_move() - removed unused flags arg from audit_inode() - added audit log routines for logging a portion of a string Here's some sample output. before patch: type=SYSCALL msg=audit(1149821605.320:26): arch=40000003 syscall=39 success=yes exit=0 a0=bf8d3c7c a1=1ff a2=804e1b8 a3=bf8d3c7c items=1 ppid=739 pid=800 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=ttyS0 comm="mkdir" exe="/bin/mkdir" subj=root:system_r:unconfined_t:s0-s0:c0.c255 type=CWD msg=audit(1149821605.320:26): cwd="/root" type=PATH msg=audit(1149821605.320:26): item=0 name="foo" parent=164068 inode=164010 dev=03:00 mode=040755 ouid=0 ogid=0 rdev=00:00 obj=root:object_r:user_home_t:s0 after patch: type=SYSCALL msg=audit(1149822032.332:24): arch=40000003 syscall=39 success=yes exit=0 a0=bfdd9c7c a1=1ff a2=804e1b8 a3=bfdd9c7c items=2 ppid=714 pid=777 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=ttyS0 comm="mkdir" exe="/bin/mkdir" subj=root:system_r:unconfined_t:s0-s0:c0.c255 type=CWD msg=audit(1149822032.332:24): cwd="/root" type=PATH msg=audit(1149822032.332:24): item=0 name="/root" inode=164068 dev=03:00 mode=040750 ouid=0 ogid=0 rdev=00:00 obj=root:object_r:user_home_dir_t:s0 type=PATH msg=audit(1149822032.332:24): item=1 name="foo" inode=164010 dev=03:00 mode=040755 ouid=0 ogid=0 rdev=00:00 obj=root:object_r:user_home_t:s0 Signed-off-by: Amy Griffis Signed-off-by: Al Viro --- fs/namei.c | 2 +- fs/open.c | 4 +- fs/xattr.c | 4 +- include/linux/audit.h | 15 +++--- include/linux/fsnotify.h | 3 +- kernel/audit.c | 54 +++++++++++++++++++-- kernel/audit.h | 3 +- kernel/auditfilter.c | 8 ++- kernel/auditsc.c | 123 ++++++++++++++++++++++++++--------------------- 9 files changed, 142 insertions(+), 74 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index d6e2ee251736..184fe4acf824 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1127,7 +1127,7 @@ out: if (likely(retval == 0)) { if (unlikely(current->audit_context && nd && nd->dentry && nd->dentry->d_inode)) - audit_inode(name, nd->dentry->d_inode, flags); + audit_inode(name, nd->dentry->d_inode); } out_fail: return retval; diff --git a/fs/open.c b/fs/open.c index 317b7c7f38a7..4f178acd4c09 100644 --- a/fs/open.c +++ b/fs/open.c @@ -633,7 +633,7 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) dentry = file->f_dentry; inode = dentry->d_inode; - audit_inode(NULL, inode, 0); + audit_inode(NULL, inode); err = -EROFS; if (IS_RDONLY(inode)) @@ -786,7 +786,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) if (file) { struct dentry * dentry; dentry = file->f_dentry; - audit_inode(NULL, dentry->d_inode, 0); + audit_inode(NULL, dentry->d_inode); error = chown_common(dentry, user, group); fput(file); } diff --git a/fs/xattr.c b/fs/xattr.c index e416190f5e9c..c32f15b5f60f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -242,7 +242,7 @@ sys_fsetxattr(int fd, char __user *name, void __user *value, if (!f) return error; dentry = f->f_dentry; - audit_inode(NULL, dentry->d_inode, 0); + audit_inode(NULL, dentry->d_inode); error = setxattr(dentry, name, value, size, flags); fput(f); return error; @@ -469,7 +469,7 @@ sys_fremovexattr(int fd, char __user *name) if (!f) return error; dentry = f->f_dentry; - audit_inode(NULL, dentry->d_inode, 0); + audit_inode(NULL, dentry->d_inode); error = removexattr(dentry, name); fput(f); return error; diff --git a/include/linux/audit.h b/include/linux/audit.h index c78327507f4e..e1c1dbdf9efb 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -310,7 +310,7 @@ extern void audit_syscall_entry(int arch, extern void audit_syscall_exit(int failed, long return_code); extern void __audit_getname(const char *name); extern void audit_putname(const char *name); -extern void __audit_inode(const char *name, const struct inode *inode, unsigned flags); +extern void __audit_inode(const char *name, const struct inode *inode); extern void __audit_inode_child(const char *dname, const struct inode *inode, unsigned long pino); static inline void audit_getname(const char *name) @@ -318,10 +318,9 @@ static inline void audit_getname(const char *name) if (unlikely(current->audit_context)) __audit_getname(name); } -static inline void audit_inode(const char *name, const struct inode *inode, - unsigned flags) { +static inline void audit_inode(const char *name, const struct inode *inode) { if (unlikely(current->audit_context)) - __audit_inode(name, inode, flags); + __audit_inode(name, inode); } static inline void audit_inode_child(const char *dname, const struct inode *inode, @@ -398,9 +397,9 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) #define audit_syscall_exit(f,r) do { ; } while (0) #define audit_getname(n) do { ; } while (0) #define audit_putname(n) do { ; } while (0) -#define __audit_inode(n,i,f) do { ; } while (0) +#define __audit_inode(n,i) do { ; } while (0) #define __audit_inode_child(d,i,p) do { ; } while (0) -#define audit_inode(n,i,f) do { ; } while (0) +#define audit_inode(n,i) do { ; } while (0) #define audit_inode_child(d,i,p) do { ; } while (0) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) @@ -435,6 +434,9 @@ extern void audit_log_hex(struct audit_buffer *ab, size_t len); extern const char * audit_log_untrustedstring(struct audit_buffer *ab, const char *string); +extern const char * audit_log_n_untrustedstring(struct audit_buffer *ab, + size_t n, + const char *string); extern void audit_log_d_path(struct audit_buffer *ab, const char *prefix, struct dentry *dentry, @@ -452,6 +454,7 @@ extern int audit_receive_filter(int type, int pid, int uid, int seq, #define audit_log_end(b) do { ; } while (0) #define audit_log_hex(a,b,l) do { ; } while (0) #define audit_log_untrustedstring(a,s) do { ; } while (0) +#define audit_log_n_untrustedstring(a,n,s) do { ; } while (0) #define audit_log_d_path(b,p,d,v) do { ; } while (0) #endif #endif diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a9d30442448f..cc5dec70c32c 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -67,8 +67,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); } - audit_inode_child(old_name, source, old_dir->i_ino); - audit_inode_child(new_name, target, new_dir->i_ino); + audit_inode_child(new_name, source, new_dir->i_ino); } /* diff --git a/kernel/audit.c b/kernel/audit.c index 0fbf1c116363..7dfac7031bd7 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1051,20 +1051,53 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, skb_put(skb, len << 1); /* new string is twice the old string */ } +/* + * Format a string of no more than slen characters into the audit buffer, + * enclosed in quote marks. + */ +static void audit_log_n_string(struct audit_buffer *ab, size_t slen, + const char *string) +{ + int avail, new_len; + unsigned char *ptr; + struct sk_buff *skb; + + BUG_ON(!ab->skb); + skb = ab->skb; + avail = skb_tailroom(skb); + new_len = slen + 3; /* enclosing quotes + null terminator */ + if (new_len > avail) { + avail = audit_expand(ab, new_len); + if (!avail) + return; + } + ptr = skb->tail; + *ptr++ = '"'; + memcpy(ptr, string, slen); + ptr += slen; + *ptr++ = '"'; + *ptr = 0; + skb_put(skb, slen + 2); /* don't include null terminator */ +} + /** - * audit_log_unstrustedstring - log a string that may contain random characters + * audit_log_n_unstrustedstring - log a string that may contain random characters * @ab: audit_buffer + * @len: lenth of string (not including trailing null) * @string: string to be logged * * This code will escape a string that is passed to it if the string * contains a control character, unprintable character, double quote mark, * or a space. Unescaped strings will start and end with a double quote mark. * Strings that are escaped are printed in hex (2 digits per char). + * + * The caller specifies the number of characters in the string to log, which may + * or may not be the entire string. */ -const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) +const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len, + const char *string) { const unsigned char *p = string; - size_t len = strlen(string); while (*p) { if (*p == '"' || *p < 0x21 || *p > 0x7f) { @@ -1073,10 +1106,23 @@ const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *strin } p++; } - audit_log_format(ab, "\"%s\"", string); + audit_log_n_string(ab, len, string); return p + 1; } +/** + * audit_log_unstrustedstring - log a string that may contain random characters + * @ab: audit_buffer + * @string: string to be logged + * + * Same as audit_log_n_unstrustedstring(), except that strlen is used to + * determine string length. + */ +const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string) +{ + return audit_log_n_untrustedstring(ab, strlen(string), string); +} + /* This is a helper-function to print the escaped d_path */ void audit_log_d_path(struct audit_buffer *ab, const char *prefix, struct dentry *dentry, struct vfsmount *vfsmnt) diff --git a/kernel/audit.h b/kernel/audit.h index 58fa44cb8d01..8323e4132a33 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -104,7 +104,8 @@ static inline int audit_hash_ino(u32 ino) } extern int audit_comparator(const u32 left, const u32 op, const u32 right); -extern int audit_compare_dname_path(const char *dname, const char *path); +extern int audit_compare_dname_path(const char *dname, const char *path, + int *dirlen); extern struct sk_buff * audit_make_reply(int pid, int seq, int type, int done, int multi, void *payload, int size); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a536f7148bcd..4c99d2c586ed 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -787,7 +787,7 @@ static void audit_update_watch(struct audit_parent *parent, mutex_lock(&audit_filter_mutex); list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { - if (audit_compare_dname_path(dname, owatch->path)) + if (audit_compare_dname_path(dname, owatch->path, NULL)) continue; /* If the update involves invalidating rules, do the inode-based @@ -1387,7 +1387,8 @@ int audit_comparator(const u32 left, const u32 op, const u32 right) /* Compare given dentry name with last component in given path, * return of 0 indicates a match. */ -int audit_compare_dname_path(const char *dname, const char *path) +int audit_compare_dname_path(const char *dname, const char *path, + int *dirlen) { int dlen, plen; const char *p; @@ -1416,6 +1417,9 @@ int audit_compare_dname_path(const char *dname, const char *path) p++; } + /* return length of path's directory component */ + if (dirlen) + *dirlen = p - path; return strncmp(p, dname, dlen); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 174a3f624892..851ae0217e4b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -82,6 +82,9 @@ extern int audit_enabled; * path_lookup. */ #define AUDIT_NAMES_RESERVED 7 +/* Indicates that audit should log the full pathname. */ +#define AUDIT_NAME_FULL -1 + /* When fs/namei.c:getname() is called, we store the pointer in name and * we don't let putname() free it (instead we free all of the saved * pointers at syscall exit time). @@ -89,8 +92,9 @@ extern int audit_enabled; * Further, in fs/namei.c:path_lookup() we store the inode and device. */ struct audit_names { const char *name; + int name_len; /* number of name's characters to log */ + unsigned name_put; /* call __putname() for this name */ unsigned long ino; - unsigned long pino; dev_t dev; umode_t mode; uid_t uid; @@ -296,12 +300,10 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_INODE: if (name) - result = (name->ino == f->val || - name->pino == f->val); + result = (name->ino == f->val); else if (ctx) { for (j = 0; j < ctx->name_count; j++) { - if (audit_comparator(ctx->names[j].ino, f->op, f->val) || - audit_comparator(ctx->names[j].pino, f->op, f->val)) { + if (audit_comparator(ctx->names[j].ino, f->op, f->val)) { ++result; break; } @@ -311,8 +313,7 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_WATCH: if (name && rule->watch->ino != (unsigned long)-1) result = (name->dev == rule->watch->dev && - (name->ino == rule->watch->ino || - name->pino == rule->watch->ino)); + name->ino == rule->watch->ino); break; case AUDIT_LOGINUID: result = 0; @@ -526,7 +527,7 @@ static inline void audit_free_names(struct audit_context *context) #endif for (i = 0; i < context->name_count; i++) { - if (context->names[i].name) + if (context->names[i].name && context->names[i].name_put) __putname(context->names[i].name); } context->name_count = 0; @@ -850,8 +851,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } } for (i = 0; i < context->name_count; i++) { - unsigned long ino = context->names[i].ino; - unsigned long pino = context->names[i].pino; + struct audit_names *n = &context->names[i]; ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); if (!ab) @@ -859,33 +859,47 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_format(ab, "item=%d", i); - audit_log_format(ab, " name="); - if (context->names[i].name) - audit_log_untrustedstring(ab, context->names[i].name); - else - audit_log_format(ab, "(null)"); - - if (pino != (unsigned long)-1) - audit_log_format(ab, " parent=%lu", pino); - if (ino != (unsigned long)-1) - audit_log_format(ab, " inode=%lu", ino); - if ((pino != (unsigned long)-1) || (ino != (unsigned long)-1)) - audit_log_format(ab, " dev=%02x:%02x mode=%#o" - " ouid=%u ogid=%u rdev=%02x:%02x", - MAJOR(context->names[i].dev), - MINOR(context->names[i].dev), - context->names[i].mode, - context->names[i].uid, - context->names[i].gid, - MAJOR(context->names[i].rdev), - MINOR(context->names[i].rdev)); - if (context->names[i].osid != 0) { + if (n->name) { + switch(n->name_len) { + case AUDIT_NAME_FULL: + /* log the full path */ + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, n->name); + break; + case 0: + /* name was specified as a relative path and the + * directory component is the cwd */ + audit_log_d_path(ab, " name=", context->pwd, + context->pwdmnt); + break; + default: + /* log the name's directory component */ + audit_log_format(ab, " name="); + audit_log_n_untrustedstring(ab, n->name_len, + n->name); + } + } else + audit_log_format(ab, " name=(null)"); + + if (n->ino != (unsigned long)-1) { + audit_log_format(ab, " inode=%lu" + " dev=%02x:%02x mode=%#o" + " ouid=%u ogid=%u rdev=%02x:%02x", + n->ino, + MAJOR(n->dev), + MINOR(n->dev), + n->mode, + n->uid, + n->gid, + MAJOR(n->rdev), + MINOR(n->rdev)); + } + if (n->osid != 0) { char *ctx = NULL; u32 len; if (selinux_ctxid_to_string( - context->names[i].osid, &ctx, &len)) { - audit_log_format(ab, " osid=%u", - context->names[i].osid); + n->osid, &ctx, &len)) { + audit_log_format(ab, " osid=%u", n->osid); call_panic = 2; } else audit_log_format(ab, " obj=%s", ctx); @@ -1075,6 +1089,8 @@ void __audit_getname(const char *name) } BUG_ON(context->name_count >= AUDIT_NAMES); context->names[context->name_count].name = name; + context->names[context->name_count].name_len = AUDIT_NAME_FULL; + context->names[context->name_count].name_put = 1; context->names[context->name_count].ino = (unsigned long)-1; ++context->name_count; if (!context->pwd) { @@ -1141,11 +1157,10 @@ static void audit_inode_context(int idx, const struct inode *inode) * audit_inode - store the inode and device from a lookup * @name: name being audited * @inode: inode being audited - * @flags: lookup flags (as used in path_lookup()) * * Called from fs/namei.c:path_lookup(). */ -void __audit_inode(const char *name, const struct inode *inode, unsigned flags) +void __audit_inode(const char *name, const struct inode *inode) { int idx; struct audit_context *context = current->audit_context; @@ -1171,20 +1186,13 @@ void __audit_inode(const char *name, const struct inode *inode, unsigned flags) ++context->ino_count; #endif } + context->names[idx].ino = inode->i_ino; context->names[idx].dev = inode->i_sb->s_dev; context->names[idx].mode = inode->i_mode; context->names[idx].uid = inode->i_uid; context->names[idx].gid = inode->i_gid; context->names[idx].rdev = inode->i_rdev; audit_inode_context(idx, inode); - if ((flags & LOOKUP_PARENT) && (strcmp(name, "/") != 0) && - (strcmp(name, ".") != 0)) { - context->names[idx].ino = (unsigned long)-1; - context->names[idx].pino = inode->i_ino; - } else { - context->names[idx].ino = inode->i_ino; - context->names[idx].pino = (unsigned long)-1; - } } /** @@ -1206,34 +1214,40 @@ void __audit_inode_child(const char *dname, const struct inode *inode, { int idx; struct audit_context *context = current->audit_context; + const char *found_name = NULL; + int dirlen = 0; if (!context->in_syscall) return; /* determine matching parent */ if (!dname) - goto no_match; + goto update_context; for (idx = 0; idx < context->name_count; idx++) - if (context->names[idx].pino == pino) { + if (context->names[idx].ino == pino) { const char *name = context->names[idx].name; if (!name) continue; - if (audit_compare_dname_path(dname, name) == 0) - goto update_context; + if (audit_compare_dname_path(dname, name, &dirlen) == 0) { + context->names[idx].name_len = dirlen; + found_name = name; + break; + } } -no_match: - /* catch-all in case match not found */ +update_context: idx = context->name_count++; - context->names[idx].name = NULL; - context->names[idx].pino = pino; #if AUDIT_DEBUG context->ino_count++; #endif + /* Re-use the name belonging to the slot for a matching parent directory. + * All names for this context are relinquished in audit_free_names() */ + context->names[idx].name = found_name; + context->names[idx].name_len = AUDIT_NAME_FULL; + context->names[idx].name_put = 0; /* don't call __putname() */ -update_context: if (inode) { context->names[idx].ino = inode->i_ino; context->names[idx].dev = inode->i_sb->s_dev; @@ -1242,7 +1256,8 @@ update_context: context->names[idx].gid = inode->i_gid; context->names[idx].rdev = inode->i_rdev; audit_inode_context(idx, inode); - } + } else + context->names[idx].ino = (unsigned long)-1; } /** -- cgit v1.2.3-59-g8ed1b