From 90204e0b7b51e9f2a6905adca12dc331128602c7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 1 Jun 2006 21:39:38 -0400
Subject: [PATCH] remove config.h from inotify.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/inotify.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 09e00433c78e..71aa1553ef38 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -67,7 +67,6 @@ struct inotify_event {
 
 #include <linux/dcache.h>
 #include <linux/fs.h>
-#include <linux/config.h>
 
 #ifdef CONFIG_INOTIFY
 
-- 
cgit v1.2.3-59-g8ed1b


From 2d9048e201bfb67ba21f05e647b1286b8a4a5667 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 1 Jun 2006 13:10:59 -0700
Subject: [PATCH] inotify (1/5): split kernel API from userspace support

The following series of patches introduces a kernel API for inotify,
making it possible for kernel modules to benefit from inotify's
mechanism for watching inodes.  With these patches, inotify will
maintain for each caller a list of watches (via an embedded struct
inotify_watch), where each inotify_watch is associated with a
corresponding struct inode.  The caller registers an event handler and
specifies for which filesystem events their event handler should be
called per inotify_watch.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Acked-by: Robert Love <rml@novell.com>
Acked-by: John McCutchan <john@johnmccutchan.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Kconfig              |  24 +-
 fs/Makefile             |   1 +
 fs/inotify.c            | 941 ++++++++++++------------------------------------
 fs/inotify_user.c       | 717 ++++++++++++++++++++++++++++++++++++
 include/linux/inotify.h |  76 ++++
 include/linux/sched.h   |   2 +-
 kernel/sysctl.c         |   4 +-
 kernel/user.c           |   2 +-
 8 files changed, 1046 insertions(+), 721 deletions(-)
 create mode 100644 fs/inotify_user.c

(limited to 'include')

diff --git a/fs/Kconfig b/fs/Kconfig
index f9b5842c8d2d..74f11a23622d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -393,18 +393,30 @@ config INOTIFY
 	bool "Inotify file change notification support"
 	default y
 	---help---
-	  Say Y here to enable inotify support and the associated system
-	  calls.  Inotify is a file change notification system and a
-	  replacement for dnotify.  Inotify fixes numerous shortcomings in
-	  dnotify and introduces several new features.  It allows monitoring
-	  of both files and directories via a single open fd.  Other features
-	  include multiple file events, one-shot support, and unmount
+	  Say Y here to enable inotify support.  Inotify is a file change
+	  notification system and a replacement for dnotify.  Inotify fixes
+	  numerous shortcomings in dnotify and introduces several new features
+	  including multiple file events, one-shot support, and unmount
 	  notification.
 
 	  For more information, see Documentation/filesystems/inotify.txt
 
 	  If unsure, say Y.
 
+config INOTIFY_USER
+	bool "Inotify support for userspace"
+	depends on INOTIFY
+	default y
+	---help---
+	  Say Y here to enable inotify support for userspace, including the
+	  associated system calls.  Inotify allows monitoring of both files and
+	  directories via a single open fd.  Events are read from the file
+	  descriptor, which is also select()- and poll()-able.
+
+	  For more information, see Documentation/filesystems/inotify.txt
+
+	  If unsure, say Y.
+
 config QUOTA
 	bool "Quota support"
 	help
diff --git a/fs/Makefile b/fs/Makefile
index 078d3d1191a5..d0ea6bfccf29 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,6 +13,7 @@ obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
 		ioprio.o pnode.o drop_caches.o splice.o sync.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
+obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
 obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
 
diff --git a/fs/inotify.c b/fs/inotify.c
index 732ec4bd5774..a1bedf3975ca 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -5,7 +5,10 @@
  *	John McCutchan	<ttb@tentacle.dhs.org>
  *	Robert Love	<rml@novell.com>
  *
+ * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
+ *
  * Copyright (C) 2005 John McCutchan
+ * Copyright 2006 Hewlett-Packard Development Company, L.P.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -20,35 +23,17 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/writeback.h>
 #include <linux/inotify.h>
-#include <linux/syscalls.h>
-
-#include <asm/ioctls.h>
 
 static atomic_t inotify_cookie;
 
-static kmem_cache_t *watch_cachep __read_mostly;
-static kmem_cache_t *event_cachep __read_mostly;
-
-static struct vfsmount *inotify_mnt __read_mostly;
-
-/* these are configurable via /proc/sys/fs/inotify/ */
-int inotify_max_user_instances __read_mostly;
-int inotify_max_user_watches __read_mostly;
-int inotify_max_queued_events __read_mostly;
-
 /*
  * Lock ordering:
  *
@@ -56,327 +41,108 @@ int inotify_max_queued_events __read_mostly;
  * iprune_mutex (synchronize shrink_icache_memory())
  * 	inode_lock (protects the super_block->s_inodes list)
  * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
- * 		inotify_dev->mutex (protects inotify_device and watches->d_list)
+ * 		inotify_handle->mutex (protects inotify_handle and watches->h_list)
+ *
+ * The inode->inotify_mutex and inotify_handle->mutex and held during execution
+ * of a caller's event handler.  Thus, the caller must not hold any locks
+ * taken in their event handler while calling any of the published inotify
+ * interfaces.
  */
 
 /*
- * Lifetimes of the three main data structures--inotify_device, inode, and
+ * Lifetimes of the three main data structures--inotify_handle, inode, and
  * inotify_watch--are managed by reference count.
  *
- * inotify_device: Lifetime is from inotify_init() until release.  Additional
- * references can bump the count via get_inotify_dev() and drop the count via
- * put_inotify_dev().
+ * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
+ * Additional references can bump the count via get_inotify_handle() and drop
+ * the count via put_inotify_handle().
  *
- * inotify_watch: Lifetime is from create_watch() to destory_watch().
- * Additional references can bump the count via get_inotify_watch() and drop
- * the count via put_inotify_watch().
+ * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
+ * to remove_watch_no_event().  Additional references can bump the count via
+ * get_inotify_watch() and drop the count via put_inotify_watch().  The caller
+ * is reponsible for the final put after receiving IN_IGNORED, or when using
+ * IN_ONESHOT after receiving the first event.  Inotify does the final put if
+ * inotify_destroy() is called.
  *
  * inode: Pinned so long as the inode is associated with a watch, from
- * create_watch() to put_inotify_watch().
+ * inotify_add_watch() to the final put_inotify_watch().
  */
 
 /*
- * struct inotify_device - represents an inotify instance
+ * struct inotify_handle - represents an inotify instance
  *
  * This structure is protected by the mutex 'mutex'.
  */
-struct inotify_device {
-	wait_queue_head_t 	wq;		/* wait queue for i/o */
+struct inotify_handle {
 	struct idr		idr;		/* idr mapping wd -> watch */
 	struct mutex		mutex;		/* protects this bad boy */
-	struct list_head 	events;		/* list of queued events */
 	struct list_head	watches;	/* list of watches */
 	atomic_t		count;		/* reference count */
-	struct user_struct	*user;		/* user who opened this dev */
-	unsigned int		queue_size;	/* size of the queue (bytes) */
-	unsigned int		event_count;	/* number of pending events */
-	unsigned int		max_events;	/* maximum number of events */
 	u32			last_wd;	/* the last wd allocated */
+	const struct inotify_operations *in_ops; /* inotify caller operations */
 };
 
-/*
- * struct inotify_kernel_event - An inotify event, originating from a watch and
- * queued for user-space.  A list of these is attached to each instance of the
- * device.  In read(), this list is walked and all events that can fit in the
- * buffer are returned.
- *
- * Protected by dev->mutex of the device in which we are queued.
- */
-struct inotify_kernel_event {
-	struct inotify_event	event;	/* the user-space event */
-	struct list_head        list;	/* entry in inotify_device's list */
-	char			*name;	/* filename, if any */
-};
-
-/*
- * struct inotify_watch - represents a watch request on a specific inode
- *
- * d_list is protected by dev->mutex of the associated watch->dev.
- * i_list and mask are protected by inode->inotify_mutex of the associated inode.
- * dev, inode, and wd are never written to once the watch is created.
- */
-struct inotify_watch {
-	struct list_head	d_list;	/* entry in inotify_device's list */
-	struct list_head	i_list;	/* entry in inode's list */
-	atomic_t		count;	/* reference count */
-	struct inotify_device	*dev;	/* associated device */
-	struct inode		*inode;	/* associated inode */
-	s32 			wd;	/* watch descriptor */
-	u32			mask;	/* event mask for this watch */
-};
-
-#ifdef CONFIG_SYSCTL
-
-#include <linux/sysctl.h>
-
-static int zero;
-
-ctl_table inotify_table[] = {
-	{
-		.ctl_name	= INOTIFY_MAX_USER_INSTANCES,
-		.procname	= "max_user_instances",
-		.data		= &inotify_max_user_instances,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &zero,
-	},
-	{
-		.ctl_name	= INOTIFY_MAX_USER_WATCHES,
-		.procname	= "max_user_watches",
-		.data		= &inotify_max_user_watches,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &zero, 
-	},
-	{
-		.ctl_name	= INOTIFY_MAX_QUEUED_EVENTS,
-		.procname	= "max_queued_events",
-		.data		= &inotify_max_queued_events,
-		.maxlen		= sizeof(int),
-		.mode		= 0644, 
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec, 
-		.extra1		= &zero
-	},
-	{ .ctl_name = 0 }
-};
-#endif /* CONFIG_SYSCTL */
-
-static inline void get_inotify_dev(struct inotify_device *dev)
+static inline void get_inotify_handle(struct inotify_handle *ih)
 {
-	atomic_inc(&dev->count);
+	atomic_inc(&ih->count);
 }
 
-static inline void put_inotify_dev(struct inotify_device *dev)
+static inline void put_inotify_handle(struct inotify_handle *ih)
 {
-	if (atomic_dec_and_test(&dev->count)) {
-		atomic_dec(&dev->user->inotify_devs);
-		free_uid(dev->user);
-		idr_destroy(&dev->idr);
-		kfree(dev);
+	if (atomic_dec_and_test(&ih->count)) {
+		idr_destroy(&ih->idr);
+		kfree(ih);
 	}
 }
 
-static inline void get_inotify_watch(struct inotify_watch *watch)
+/**
+ * get_inotify_watch - grab a reference to an inotify_watch
+ * @watch: watch to grab
+ */
+void get_inotify_watch(struct inotify_watch *watch)
 {
 	atomic_inc(&watch->count);
 }
+EXPORT_SYMBOL_GPL(get_inotify_watch);
 
-/*
+/**
  * put_inotify_watch - decrements the ref count on a given watch.  cleans up
- * the watch and its references if the count reaches zero.
+ * watch references if the count reaches zero.  inotify_watch is freed by
+ * inotify callers via the destroy_watch() op.
+ * @watch: watch to release
  */
-static inline void put_inotify_watch(struct inotify_watch *watch)
+void put_inotify_watch(struct inotify_watch *watch)
 {
 	if (atomic_dec_and_test(&watch->count)) {
-		put_inotify_dev(watch->dev);
-		iput(watch->inode);
-		kmem_cache_free(watch_cachep, watch);
-	}
-}
-
-/*
- * kernel_event - create a new kernel event with the given parameters
- *
- * This function can sleep.
- */
-static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
-						  const char *name)
-{
-	struct inotify_kernel_event *kevent;
-
-	kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
-	if (unlikely(!kevent))
-		return NULL;
-
-	/* we hand this out to user-space, so zero it just in case */
-	memset(&kevent->event, 0, sizeof(struct inotify_event));
-
-	kevent->event.wd = wd;
-	kevent->event.mask = mask;
-	kevent->event.cookie = cookie;
-
-	INIT_LIST_HEAD(&kevent->list);
-
-	if (name) {
-		size_t len, rem, event_size = sizeof(struct inotify_event);
-
-		/*
-		 * We need to pad the filename so as to properly align an
-		 * array of inotify_event structures.  Because the structure is
-		 * small and the common case is a small filename, we just round
-		 * up to the next multiple of the structure's sizeof.  This is
-		 * simple and safe for all architectures.
-		 */
-		len = strlen(name) + 1;
-		rem = event_size - len;
-		if (len > event_size) {
-			rem = event_size - (len % event_size);
-			if (len % event_size == 0)
-				rem = 0;
-		}
-
-		kevent->name = kmalloc(len + rem, GFP_KERNEL);
-		if (unlikely(!kevent->name)) {
-			kmem_cache_free(event_cachep, kevent);
-			return NULL;
-		}
-		memcpy(kevent->name, name, len);
-		if (rem)
-			memset(kevent->name + len, 0, rem);		
-		kevent->event.len = len + rem;
-	} else {
-		kevent->event.len = 0;
-		kevent->name = NULL;
-	}
-
-	return kevent;
-}
-
-/*
- * inotify_dev_get_event - return the next event in the given dev's queue
- *
- * Caller must hold dev->mutex.
- */
-static inline struct inotify_kernel_event *
-inotify_dev_get_event(struct inotify_device *dev)
-{
-	return list_entry(dev->events.next, struct inotify_kernel_event, list);
-}
-
-/*
- * inotify_dev_queue_event - add a new event to the given device
- *
- * Caller must hold dev->mutex.  Can sleep (calls kernel_event()).
- */
-static void inotify_dev_queue_event(struct inotify_device *dev,
-				    struct inotify_watch *watch, u32 mask,
-				    u32 cookie, const char *name)
-{
-	struct inotify_kernel_event *kevent, *last;
-
-	/* coalescing: drop this event if it is a dupe of the previous */
-	last = inotify_dev_get_event(dev);
-	if (last && last->event.mask == mask && last->event.wd == watch->wd &&
-			last->event.cookie == cookie) {
-		const char *lastname = last->name;
-
-		if (!name && !lastname)
-			return;
-		if (name && lastname && !strcmp(lastname, name))
-			return;
-	}
-
-	/* the queue overflowed and we already sent the Q_OVERFLOW event */
-	if (unlikely(dev->event_count > dev->max_events))
-		return;
-
-	/* if the queue overflows, we need to notify user space */
-	if (unlikely(dev->event_count == dev->max_events))
-		kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
-	else
-		kevent = kernel_event(watch->wd, mask, cookie, name);
-
-	if (unlikely(!kevent))
-		return;
-
-	/* queue the event and wake up anyone waiting */
-	dev->event_count++;
-	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
-	list_add_tail(&kevent->list, &dev->events);
-	wake_up_interruptible(&dev->wq);
-}
-
-/*
- * remove_kevent - cleans up and ultimately frees the given kevent
- *
- * Caller must hold dev->mutex.
- */
-static void remove_kevent(struct inotify_device *dev,
-			  struct inotify_kernel_event *kevent)
-{
-	list_del(&kevent->list);
+		struct inotify_handle *ih = watch->ih;
 
-	dev->event_count--;
-	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
-
-	kfree(kevent->name);
-	kmem_cache_free(event_cachep, kevent);
-}
-
-/*
- * inotify_dev_event_dequeue - destroy an event on the given device
- *
- * Caller must hold dev->mutex.
- */
-static void inotify_dev_event_dequeue(struct inotify_device *dev)
-{
-	if (!list_empty(&dev->events)) {
-		struct inotify_kernel_event *kevent;
-		kevent = inotify_dev_get_event(dev);
-		remove_kevent(dev, kevent);
+		iput(watch->inode);
+		ih->in_ops->destroy_watch(watch);
+		put_inotify_handle(ih);
 	}
 }
+EXPORT_SYMBOL_GPL(put_inotify_watch);
 
 /*
- * inotify_dev_get_wd - returns the next WD for use by the given dev
+ * inotify_handle_get_wd - returns the next WD for use by the given handle
  *
- * Callers must hold dev->mutex.  This function can sleep.
+ * Callers must hold ih->mutex.  This function can sleep.
  */
-static int inotify_dev_get_wd(struct inotify_device *dev,
-			      struct inotify_watch *watch)
+static int inotify_handle_get_wd(struct inotify_handle *ih,
+				 struct inotify_watch *watch)
 {
 	int ret;
 
 	do {
-		if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL)))
+		if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL)))
 			return -ENOSPC;
-		ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd);
+		ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
 	} while (ret == -EAGAIN);
 
-	return ret;
-}
+	if (likely(!ret))
+		ih->last_wd = watch->wd;
 
-/*
- * find_inode - resolve a user-given path to a specific inode and return a nd
- */
-static int find_inode(const char __user *dirname, struct nameidata *nd,
-		      unsigned flags)
-{
-	int error;
-
-	error = __user_walk(dirname, flags, nd);
-	if (error)
-		return error;
-	/* you can only watch an inode if you have read permissions on it */
-	error = vfs_permission(nd, MAY_READ);
-	if (error) 
-		path_release(nd);
-	return error;
+	return ret;
 }
 
 /*
@@ -422,67 +188,18 @@ static void set_dentry_child_flags(struct inode *inode, int watched)
 }
 
 /*
- * create_watch - creates a watch on the given device.
- *
- * Callers must hold dev->mutex.  Calls inotify_dev_get_wd() so may sleep.
- * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
- */
-static struct inotify_watch *create_watch(struct inotify_device *dev,
-					  u32 mask, struct inode *inode)
-{
-	struct inotify_watch *watch;
-	int ret;
-
-	if (atomic_read(&dev->user->inotify_watches) >=
-			inotify_max_user_watches)
-		return ERR_PTR(-ENOSPC);
-
-	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
-	if (unlikely(!watch))
-		return ERR_PTR(-ENOMEM);
-
-	ret = inotify_dev_get_wd(dev, watch);
-	if (unlikely(ret)) {
-		kmem_cache_free(watch_cachep, watch);
-		return ERR_PTR(ret);
-	}
-
-	dev->last_wd = watch->wd;
-	watch->mask = mask;
-	atomic_set(&watch->count, 0);
-	INIT_LIST_HEAD(&watch->d_list);
-	INIT_LIST_HEAD(&watch->i_list);
-
-	/* save a reference to device and bump the count to make it official */
-	get_inotify_dev(dev);
-	watch->dev = dev;
-
-	/*
-	 * Save a reference to the inode and bump the ref count to make it
-	 * official.  We hold a reference to nameidata, which makes this safe.
-	 */
-	watch->inode = igrab(inode);
-
-	/* bump our own count, corresponding to our entry in dev->watches */
-	get_inotify_watch(watch);
-
-	atomic_inc(&dev->user->inotify_watches);
-
-	return watch;
-}
-
-/*
- * inotify_find_dev - find the watch associated with the given inode and dev
+ * inotify_find_handle - find the watch associated with the given inode and
+ * handle
  *
  * Callers must hold inode->inotify_mutex.
  */
-static struct inotify_watch *inode_find_dev(struct inode *inode,
-					    struct inotify_device *dev)
+static struct inotify_watch *inode_find_handle(struct inode *inode,
+					       struct inotify_handle *ih)
 {
 	struct inotify_watch *watch;
 
 	list_for_each_entry(watch, &inode->inotify_watches, i_list) {
-		if (watch->dev == dev)
+		if (watch->ih == ih)
 			return watch;
 	}
 
@@ -491,39 +208,34 @@ static struct inotify_watch *inode_find_dev(struct inode *inode,
 
 /*
  * remove_watch_no_event - remove_watch() without the IN_IGNORED event.
+ *
+ * Callers must hold both inode->inotify_mutex and ih->mutex.
  */
 static void remove_watch_no_event(struct inotify_watch *watch,
-				  struct inotify_device *dev)
+				  struct inotify_handle *ih)
 {
 	list_del(&watch->i_list);
-	list_del(&watch->d_list);
+	list_del(&watch->h_list);
 
 	if (!inotify_inode_watched(watch->inode))
 		set_dentry_child_flags(watch->inode, 0);
 
-	atomic_dec(&dev->user->inotify_watches);
-	idr_remove(&dev->idr, watch->wd);
-	put_inotify_watch(watch);
+	idr_remove(&ih->idr, watch->wd);
 }
 
 /*
- * remove_watch - Remove a watch from both the device and the inode.  Sends
- * the IN_IGNORED event to the given device signifying that the inode is no
- * longer watched.
- *
- * Callers must hold both inode->inotify_mutex and dev->mutex.  We drop a
- * reference to the inode before returning.
+ * remove_watch - Remove a watch from both the handle and the inode.  Sends
+ * the IN_IGNORED event signifying that the inode is no longer watched.
  *
- * The inode is not iput() so as to remain atomic.  If the inode needs to be
- * iput(), the call returns one.  Otherwise, it returns zero.
+ * Callers must hold both inode->inotify_mutex and ih->mutex.
  */
-static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
+static void remove_watch(struct inotify_watch *watch, struct inotify_handle *ih)
 {
-	inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
-	remove_watch_no_event(watch, dev);
+	remove_watch_no_event(watch, ih);
+	ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL);
 }
 
-/* Kernel API */
+/* Kernel API for producing events */
 
 /*
  * inotify_d_instantiate - instantiate dcache entry for inode
@@ -576,14 +288,12 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
 	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 		u32 watch_mask = watch->mask;
 		if (watch_mask & mask) {
-			struct inotify_device *dev = watch->dev;
-			get_inotify_watch(watch);
-			mutex_lock(&dev->mutex);
-			inotify_dev_queue_event(dev, watch, mask, cookie, name);
+			struct inotify_handle *ih= watch->ih;
+			mutex_lock(&ih->mutex);
 			if (watch_mask & IN_ONESHOT)
-				remove_watch_no_event(watch, dev);
-			mutex_unlock(&dev->mutex);
-			put_inotify_watch(watch);
+				remove_watch_no_event(watch, ih);
+			ih->in_ops->handle_event(watch, watch->wd, mask, cookie, name);
+			mutex_unlock(&ih->mutex);
 		}
 	}
 	mutex_unlock(&inode->inotify_mutex);
@@ -694,11 +404,12 @@ void inotify_unmount_inodes(struct list_head *list)
 		mutex_lock(&inode->inotify_mutex);
 		watches = &inode->inotify_watches;
 		list_for_each_entry_safe(watch, next_w, watches, i_list) {
-			struct inotify_device *dev = watch->dev;
-			mutex_lock(&dev->mutex);
-			inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
-			remove_watch(watch, dev);
-			mutex_unlock(&dev->mutex);
+			struct inotify_handle *ih= watch->ih;
+			mutex_lock(&ih->mutex);
+			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
+						 NULL);
+			remove_watch(watch, ih);
+			mutex_unlock(&ih->mutex);
 		}
 		mutex_unlock(&inode->inotify_mutex);
 		iput(inode);		
@@ -718,432 +429,240 @@ void inotify_inode_is_dead(struct inode *inode)
 
 	mutex_lock(&inode->inotify_mutex);
 	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
-		struct inotify_device *dev = watch->dev;
-		mutex_lock(&dev->mutex);
-		remove_watch(watch, dev);
-		mutex_unlock(&dev->mutex);
+		struct inotify_handle *ih = watch->ih;
+		mutex_lock(&ih->mutex);
+		remove_watch(watch, ih);
+		mutex_unlock(&ih->mutex);
 	}
 	mutex_unlock(&inode->inotify_mutex);
 }
 EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
 
-/* Device Interface */
-
-static unsigned int inotify_poll(struct file *file, poll_table *wait)
-{
-	struct inotify_device *dev = file->private_data;
-	int ret = 0;
-
-	poll_wait(file, &dev->wq, wait);
-	mutex_lock(&dev->mutex);
-	if (!list_empty(&dev->events))
-		ret = POLLIN | POLLRDNORM;
-	mutex_unlock(&dev->mutex);
-
-	return ret;
-}
+/* Kernel Consumer API */
 
-static ssize_t inotify_read(struct file *file, char __user *buf,
-			    size_t count, loff_t *pos)
+/**
+ * inotify_init - allocate and initialize an inotify instance
+ * @ops: caller's inotify operations
+ */
+struct inotify_handle *inotify_init(const struct inotify_operations *ops)
 {
-	size_t event_size = sizeof (struct inotify_event);
-	struct inotify_device *dev;
-	char __user *start;
-	int ret;
-	DEFINE_WAIT(wait);
-
-	start = buf;
-	dev = file->private_data;
-
-	while (1) {
-		int events;
-
-		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
+	struct inotify_handle *ih;
 
-		mutex_lock(&dev->mutex);
-		events = !list_empty(&dev->events);
-		mutex_unlock(&dev->mutex);
-		if (events) {
-			ret = 0;
-			break;
-		}
-
-		if (file->f_flags & O_NONBLOCK) {
-			ret = -EAGAIN;
-			break;
-		}
-
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-
-		schedule();
-	}
-
-	finish_wait(&dev->wq, &wait);
-	if (ret)
-		return ret;
-
-	mutex_lock(&dev->mutex);
-	while (1) {
-		struct inotify_kernel_event *kevent;
-
-		ret = buf - start;
-		if (list_empty(&dev->events))
-			break;
-
-		kevent = inotify_dev_get_event(dev);
-		if (event_size + kevent->event.len > count)
-			break;
-
-		if (copy_to_user(buf, &kevent->event, event_size)) {
-			ret = -EFAULT;
-			break;
-		}
-		buf += event_size;
-		count -= event_size;
-
-		if (kevent->name) {
-			if (copy_to_user(buf, kevent->name, kevent->event.len)){
-				ret = -EFAULT;
-				break;
-			}
-			buf += kevent->event.len;
-			count -= kevent->event.len;
-		}
+	ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
+	if (unlikely(!ih))
+		return ERR_PTR(-ENOMEM);
 
-		remove_kevent(dev, kevent);
-	}
-	mutex_unlock(&dev->mutex);
+	idr_init(&ih->idr);
+	INIT_LIST_HEAD(&ih->watches);
+	mutex_init(&ih->mutex);
+	ih->last_wd = 0;
+	ih->in_ops = ops;
+	atomic_set(&ih->count, 0);
+	get_inotify_handle(ih);
 
-	return ret;
+	return ih;
 }
+EXPORT_SYMBOL_GPL(inotify_init);
 
-static int inotify_release(struct inode *ignored, struct file *file)
+/**
+ * inotify_destroy - clean up and destroy an inotify instance
+ * @ih: inotify handle
+ */
+void inotify_destroy(struct inotify_handle *ih)
 {
-	struct inotify_device *dev = file->private_data;
-
 	/*
-	 * Destroy all of the watches on this device.  Unfortunately, not very
+	 * Destroy all of the watches for this handle. Unfortunately, not very
 	 * pretty.  We cannot do a simple iteration over the list, because we
 	 * do not know the inode until we iterate to the watch.  But we need to
-	 * hold inode->inotify_mutex before dev->mutex.  The following works.
+	 * hold inode->inotify_mutex before ih->mutex.  The following works.
 	 */
 	while (1) {
 		struct inotify_watch *watch;
 		struct list_head *watches;
 		struct inode *inode;
 
-		mutex_lock(&dev->mutex);
-		watches = &dev->watches;
+		mutex_lock(&ih->mutex);
+		watches = &ih->watches;
 		if (list_empty(watches)) {
-			mutex_unlock(&dev->mutex);
+			mutex_unlock(&ih->mutex);
 			break;
 		}
-		watch = list_entry(watches->next, struct inotify_watch, d_list);
+		watch = list_entry(watches->next, struct inotify_watch, h_list);
 		get_inotify_watch(watch);
-		mutex_unlock(&dev->mutex);
+		mutex_unlock(&ih->mutex);
 
 		inode = watch->inode;
 		mutex_lock(&inode->inotify_mutex);
-		mutex_lock(&dev->mutex);
+		mutex_lock(&ih->mutex);
 
 		/* make sure we didn't race with another list removal */
-		if (likely(idr_find(&dev->idr, watch->wd)))
-			remove_watch_no_event(watch, dev);
+		if (likely(idr_find(&ih->idr, watch->wd))) {
+			remove_watch_no_event(watch, ih);
+			put_inotify_watch(watch);
+		}
 
-		mutex_unlock(&dev->mutex);
+		mutex_unlock(&ih->mutex);
 		mutex_unlock(&inode->inotify_mutex);
 		put_inotify_watch(watch);
 	}
 
-	/* destroy all of the events on this device */
-	mutex_lock(&dev->mutex);
-	while (!list_empty(&dev->events))
-		inotify_dev_event_dequeue(dev);
-	mutex_unlock(&dev->mutex);
-
-	/* free this device: the put matching the get in inotify_init() */
-	put_inotify_dev(dev);
-
-	return 0;
+	/* free this handle: the put matching the get in inotify_init() */
+	put_inotify_handle(ih);
 }
+EXPORT_SYMBOL_GPL(inotify_destroy);
 
-/*
- * inotify_ignore - remove a given wd from this inotify instance.
+/**
+ * inotify_find_update_watch - find and update the mask of an existing watch
+ * @ih: inotify handle
+ * @inode: inode's watch to update
+ * @mask: mask of events to watch
  *
- * Can sleep.
+ * Caller must pin given inode (via nameidata).
  */
-static int inotify_ignore(struct inotify_device *dev, s32 wd)
+s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
+			      u32 mask)
 {
-	struct inotify_watch *watch;
-	struct inode *inode;
+	struct inotify_watch *old;
+	int mask_add = 0;
+	int ret;
 
-	mutex_lock(&dev->mutex);
-	watch = idr_find(&dev->idr, wd);
-	if (unlikely(!watch)) {
-		mutex_unlock(&dev->mutex);
+	if (mask & IN_MASK_ADD)
+		mask_add = 1;
+
+	/* don't allow invalid bits: we don't want flags set */
+	mask &= IN_ALL_EVENTS | IN_ONESHOT;
+	if (unlikely(!mask))
 		return -EINVAL;
-	}
-	get_inotify_watch(watch);
-	inode = watch->inode;
-	mutex_unlock(&dev->mutex);
 
 	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&dev->mutex);
-
-	/* make sure that we did not race */
-	if (likely(idr_find(&dev->idr, wd) == watch))
-		remove_watch(watch, dev);
-
-	mutex_unlock(&dev->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	put_inotify_watch(watch);
-
-	return 0;
-}
-
-static long inotify_ioctl(struct file *file, unsigned int cmd,
-			  unsigned long arg)
-{
-	struct inotify_device *dev;
-	void __user *p;
-	int ret = -ENOTTY;
+	mutex_lock(&ih->mutex);
 
-	dev = file->private_data;
-	p = (void __user *) arg;
-
-	switch (cmd) {
-	case FIONREAD:
-		ret = put_user(dev->queue_size, (int __user *) p);
-		break;
-	}
-
-	return ret;
-}
-
-static const struct file_operations inotify_fops = {
-	.poll           = inotify_poll,
-	.read           = inotify_read,
-	.release        = inotify_release,
-	.unlocked_ioctl = inotify_ioctl,
-	.compat_ioctl	= inotify_ioctl,
-};
-
-asmlinkage long sys_inotify_init(void)
-{
-	struct inotify_device *dev;
-	struct user_struct *user;
-	struct file *filp;	
-	int fd, ret;
-
-	fd = get_unused_fd();
-	if (fd < 0)
-		return fd;
-
-	filp = get_empty_filp();
-	if (!filp) {
-		ret = -ENFILE;
-		goto out_put_fd;
-	}
-
-	user = get_uid(current->user);
-	if (unlikely(atomic_read(&user->inotify_devs) >=
-			inotify_max_user_instances)) {
-		ret = -EMFILE;
-		goto out_free_uid;
-	}
-
-	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
-	if (unlikely(!dev)) {
-		ret = -ENOMEM;
-		goto out_free_uid;
+	/*
+	 * Handle the case of re-adding a watch on an (inode,ih) pair that we
+	 * are already watching.  We just update the mask and return its wd.
+	 */
+	old = inode_find_handle(inode, ih);
+	if (unlikely(!old)) {
+		ret = -ENOENT;
+		goto out;
 	}
 
-	filp->f_op = &inotify_fops;
-	filp->f_vfsmnt = mntget(inotify_mnt);
-	filp->f_dentry = dget(inotify_mnt->mnt_root);
-	filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
-	filp->f_mode = FMODE_READ;
-	filp->f_flags = O_RDONLY;
-	filp->private_data = dev;
-
-	idr_init(&dev->idr);
-	INIT_LIST_HEAD(&dev->events);
-	INIT_LIST_HEAD(&dev->watches);
-	init_waitqueue_head(&dev->wq);
-	mutex_init(&dev->mutex);
-	dev->event_count = 0;
-	dev->queue_size = 0;
-	dev->max_events = inotify_max_queued_events;
-	dev->user = user;
-	dev->last_wd = 0;
-	atomic_set(&dev->count, 0);
-
-	get_inotify_dev(dev);
-	atomic_inc(&user->inotify_devs);
-	fd_install(fd, filp);
-
-	return fd;
-out_free_uid:
-	free_uid(user);
-	put_filp(filp);
-out_put_fd:
-	put_unused_fd(fd);
+	if (mask_add)
+		old->mask |= mask;
+	else
+		old->mask = mask;
+	ret = old->wd;
+out:
+	mutex_unlock(&ih->mutex);
+	mutex_unlock(&inode->inotify_mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(inotify_find_update_watch);
 
-asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
+/**
+ * inotify_add_watch - add a watch to an inotify instance
+ * @ih: inotify handle
+ * @watch: caller allocated watch structure
+ * @inode: inode to watch
+ * @mask: mask of events to watch
+ *
+ * Caller must pin given inode (via nameidata).
+ * Caller must ensure it only calls inotify_add_watch() once per watch.
+ * Calls inotify_handle_get_wd() so may sleep.
+ */
+s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
+		      struct inode *inode, u32 mask)
 {
-	struct inotify_watch *watch, *old;
-	struct inode *inode;
-	struct inotify_device *dev;
-	struct nameidata nd;
-	struct file *filp;
-	int ret, fput_needed;
-	int mask_add = 0;
-	unsigned flags = 0;
-
-	filp = fget_light(fd, &fput_needed);
-	if (unlikely(!filp))
-		return -EBADF;
+	int ret = 0;
 
-	/* verify that this is indeed an inotify instance */
-	if (unlikely(filp->f_op != &inotify_fops)) {
-		ret = -EINVAL;
-		goto fput_and_out;
-	}
+	/* don't allow invalid bits: we don't want flags set */
+	mask &= IN_ALL_EVENTS | IN_ONESHOT;
+	if (unlikely(!mask))
+		return -EINVAL;
+	watch->mask = mask;
 
-	if (!(mask & IN_DONT_FOLLOW))
-		flags |= LOOKUP_FOLLOW;
-	if (mask & IN_ONLYDIR)
-		flags |= LOOKUP_DIRECTORY;
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&ih->mutex);
 
-	ret = find_inode(path, &nd, flags);
+	/* Initialize a new watch */
+	ret = inotify_handle_get_wd(ih, watch);
 	if (unlikely(ret))
-		goto fput_and_out;
-
-	/* inode held in place by reference to nd; dev by fget on fd */
-	inode = nd.dentry->d_inode;
-	dev = filp->private_data;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&dev->mutex);
+		goto out;
+	ret = watch->wd;
 
-	if (mask & IN_MASK_ADD)
-		mask_add = 1;
+	atomic_set(&watch->count, 0);
+	INIT_LIST_HEAD(&watch->h_list);
+	INIT_LIST_HEAD(&watch->i_list);
 
-	/* don't let user-space set invalid bits: we don't want flags set */
-	mask &= IN_ALL_EVENTS | IN_ONESHOT;
-	if (unlikely(!mask)) {
-		ret = -EINVAL;
-		goto out;
-	}
+	/* save a reference to handle and bump the count to make it official */
+	get_inotify_handle(ih);
+	watch->ih = ih;
 
 	/*
-	 * Handle the case of re-adding a watch on an (inode,dev) pair that we
-	 * are already watching.  We just update the mask and return its wd.
+	 * Save a reference to the inode and bump the ref count to make it
+	 * official.  We hold a reference to nameidata, which makes this safe.
 	 */
-	old = inode_find_dev(inode, dev);
-	if (unlikely(old)) {
-		if (mask_add)
-			old->mask |= mask;
-		else
-			old->mask = mask;
-		ret = old->wd;
-		goto out;
-	}
+	watch->inode = igrab(inode);
 
-	watch = create_watch(dev, mask, inode);
-	if (unlikely(IS_ERR(watch))) {
-		ret = PTR_ERR(watch);
-		goto out;
-	}
+	get_inotify_watch(watch); /* initial get */
 
 	if (!inotify_inode_watched(inode))
 		set_dentry_child_flags(inode, 1);
 
-	/* Add the watch to the device's and the inode's list */
-	list_add(&watch->d_list, &dev->watches);
+	/* Add the watch to the handle's and the inode's list */
+	list_add(&watch->h_list, &ih->watches);
 	list_add(&watch->i_list, &inode->inotify_watches);
-	ret = watch->wd;
 out:
-	mutex_unlock(&dev->mutex);
+	mutex_unlock(&ih->mutex);
 	mutex_unlock(&inode->inotify_mutex);
-	path_release(&nd);
-fput_and_out:
-	fput_light(filp, fput_needed);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(inotify_add_watch);
 
-asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
+/**
+ * inotify_rm_wd - remove a watch from an inotify instance
+ * @ih: inotify handle
+ * @wd: watch descriptor to remove
+ *
+ * Can sleep.
+ */
+int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
 {
-	struct file *filp;
-	struct inotify_device *dev;
-	int ret, fput_needed;
-
-	filp = fget_light(fd, &fput_needed);
-	if (unlikely(!filp))
-		return -EBADF;
+	struct inotify_watch *watch;
+	struct inode *inode;
 
-	/* verify that this is indeed an inotify instance */
-	if (unlikely(filp->f_op != &inotify_fops)) {
-		ret = -EINVAL;
-		goto out;
+	mutex_lock(&ih->mutex);
+	watch = idr_find(&ih->idr, wd);
+	if (unlikely(!watch)) {
+		mutex_unlock(&ih->mutex);
+		return -EINVAL;
 	}
+	get_inotify_watch(watch);
+	inode = watch->inode;
+	mutex_unlock(&ih->mutex);
 
-	dev = filp->private_data;
-	ret = inotify_ignore(dev, wd);
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&ih->mutex);
 
-out:
-	fput_light(filp, fput_needed);
-	return ret;
-}
+	/* make sure that we did not race */
+	if (likely(idr_find(&ih->idr, wd) == watch))
+		remove_watch(watch, ih);
 
-static struct super_block *
-inotify_get_sb(struct file_system_type *fs_type, int flags,
-	       const char *dev_name, void *data)
-{
-    return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
-}
+	mutex_unlock(&ih->mutex);
+	mutex_unlock(&inode->inotify_mutex);
+	put_inotify_watch(watch);
 
-static struct file_system_type inotify_fs_type = {
-    .name           = "inotifyfs",
-    .get_sb         = inotify_get_sb,
-    .kill_sb        = kill_anon_super,
-};
+	return 0;
+}
+EXPORT_SYMBOL_GPL(inotify_rm_wd);
 
 /*
- * inotify_setup - Our initialization function.  Note that we cannnot return
- * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
- * must result in panic().
+ * inotify_setup - core initialization function
  */
 static int __init inotify_setup(void)
 {
-	int ret;
-
-	ret = register_filesystem(&inotify_fs_type);
-	if (unlikely(ret))
-		panic("inotify: register_filesystem returned %d!\n", ret);
-
-	inotify_mnt = kern_mount(&inotify_fs_type);
-	if (IS_ERR(inotify_mnt))
-		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
-
-	inotify_max_queued_events = 16384;
-	inotify_max_user_instances = 128;
-	inotify_max_user_watches = 8192;
-
 	atomic_set(&inotify_cookie, 0);
 
-	watch_cachep = kmem_cache_create("inotify_watch_cache",
-					 sizeof(struct inotify_watch),
-					 0, SLAB_PANIC, NULL, NULL);
-	event_cachep = kmem_cache_create("inotify_event_cache",
-					 sizeof(struct inotify_kernel_event),
-					 0, SLAB_PANIC, NULL, NULL);
-
 	return 0;
 }
 
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
new file mode 100644
index 000000000000..845dc79a4e9c
--- /dev/null
+++ b/fs/inotify_user.c
@@ -0,0 +1,717 @@
+/*
+ * fs/inotify_user.c - inotify support for userspace
+ *
+ * Authors:
+ *	John McCutchan	<ttb@tentacle.dhs.org>
+ *	Robert Love	<rml@novell.com>
+ *
+ * Copyright (C) 2005 John McCutchan
+ * Copyright 2006 Hewlett-Packard Development Company, L.P.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/inotify.h>
+#include <linux/syscalls.h>
+
+#include <asm/ioctls.h>
+
+static kmem_cache_t *watch_cachep __read_mostly;
+static kmem_cache_t *event_cachep __read_mostly;
+
+static struct vfsmount *inotify_mnt __read_mostly;
+
+/* these are configurable via /proc/sys/fs/inotify/ */
+int inotify_max_user_instances __read_mostly;
+int inotify_max_user_watches __read_mostly;
+int inotify_max_queued_events __read_mostly;
+
+/*
+ * Lock ordering:
+ *
+ * inotify_dev->up_mutex (ensures we don't re-add the same watch)
+ * 	inode->inotify_mutex (protects inode's watch list)
+ * 		inotify_handle->mutex (protects inotify_handle's watch list)
+ * 			inotify_dev->ev_mutex (protects device's event queue)
+ */
+
+/*
+ * Lifetimes of the main data structures:
+ *
+ * inotify_device: Lifetime is managed by reference count, from
+ * sys_inotify_init() until release.  Additional references can bump the count
+ * via get_inotify_dev() and drop the count via put_inotify_dev().
+ *
+ * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
+ * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
+ * first event, or to inotify_destroy().
+ */
+
+/*
+ * struct inotify_device - represents an inotify instance
+ *
+ * This structure is protected by the mutex 'mutex'.
+ */
+struct inotify_device {
+	wait_queue_head_t 	wq;		/* wait queue for i/o */
+	struct mutex		ev_mutex;	/* protects event queue */
+	struct mutex		up_mutex;	/* synchronizes watch updates */
+	struct list_head 	events;		/* list of queued events */
+	atomic_t		count;		/* reference count */
+	struct user_struct	*user;		/* user who opened this dev */
+	struct inotify_handle	*ih;		/* inotify handle */
+	unsigned int		queue_size;	/* size of the queue (bytes) */
+	unsigned int		event_count;	/* number of pending events */
+	unsigned int		max_events;	/* maximum number of events */
+};
+
+/*
+ * struct inotify_kernel_event - An inotify event, originating from a watch and
+ * queued for user-space.  A list of these is attached to each instance of the
+ * device.  In read(), this list is walked and all events that can fit in the
+ * buffer are returned.
+ *
+ * Protected by dev->ev_mutex of the device in which we are queued.
+ */
+struct inotify_kernel_event {
+	struct inotify_event	event;	/* the user-space event */
+	struct list_head        list;	/* entry in inotify_device's list */
+	char			*name;	/* filename, if any */
+};
+
+/*
+ * struct inotify_user_watch - our version of an inotify_watch, we add
+ * a reference to the associated inotify_device.
+ */
+struct inotify_user_watch {
+	struct inotify_device	*dev;	/* associated device */
+	struct inotify_watch	wdata;	/* inotify watch data */
+};
+
+#ifdef CONFIG_SYSCTL
+
+#include <linux/sysctl.h>
+
+static int zero;
+
+ctl_table inotify_table[] = {
+	{
+		.ctl_name	= INOTIFY_MAX_USER_INSTANCES,
+		.procname	= "max_user_instances",
+		.data		= &inotify_max_user_instances,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+	{
+		.ctl_name	= INOTIFY_MAX_USER_WATCHES,
+		.procname	= "max_user_watches",
+		.data		= &inotify_max_user_watches,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+	{
+		.ctl_name	= INOTIFY_MAX_QUEUED_EVENTS,
+		.procname	= "max_queued_events",
+		.data		= &inotify_max_queued_events,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero
+	},
+	{ .ctl_name = 0 }
+};
+#endif /* CONFIG_SYSCTL */
+
+static inline void get_inotify_dev(struct inotify_device *dev)
+{
+	atomic_inc(&dev->count);
+}
+
+static inline void put_inotify_dev(struct inotify_device *dev)
+{
+	if (atomic_dec_and_test(&dev->count)) {
+		atomic_dec(&dev->user->inotify_devs);
+		free_uid(dev->user);
+		kfree(dev);
+	}
+}
+
+/*
+ * free_inotify_user_watch - cleans up the watch and its references
+ */
+static void free_inotify_user_watch(struct inotify_watch *w)
+{
+	struct inotify_user_watch *watch;
+	struct inotify_device *dev;
+
+	watch = container_of(w, struct inotify_user_watch, wdata);
+	dev = watch->dev;
+
+	atomic_dec(&dev->user->inotify_watches);
+	put_inotify_dev(dev);
+	kmem_cache_free(watch_cachep, watch);
+}
+
+/*
+ * kernel_event - create a new kernel event with the given parameters
+ *
+ * This function can sleep.
+ */
+static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
+						  const char *name)
+{
+	struct inotify_kernel_event *kevent;
+
+	kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
+	if (unlikely(!kevent))
+		return NULL;
+
+	/* we hand this out to user-space, so zero it just in case */
+	memset(&kevent->event, 0, sizeof(struct inotify_event));
+
+	kevent->event.wd = wd;
+	kevent->event.mask = mask;
+	kevent->event.cookie = cookie;
+
+	INIT_LIST_HEAD(&kevent->list);
+
+	if (name) {
+		size_t len, rem, event_size = sizeof(struct inotify_event);
+
+		/*
+		 * We need to pad the filename so as to properly align an
+		 * array of inotify_event structures.  Because the structure is
+		 * small and the common case is a small filename, we just round
+		 * up to the next multiple of the structure's sizeof.  This is
+		 * simple and safe for all architectures.
+		 */
+		len = strlen(name) + 1;
+		rem = event_size - len;
+		if (len > event_size) {
+			rem = event_size - (len % event_size);
+			if (len % event_size == 0)
+				rem = 0;
+		}
+
+		kevent->name = kmalloc(len + rem, GFP_KERNEL);
+		if (unlikely(!kevent->name)) {
+			kmem_cache_free(event_cachep, kevent);
+			return NULL;
+		}
+		memcpy(kevent->name, name, len);
+		if (rem)
+			memset(kevent->name + len, 0, rem);
+		kevent->event.len = len + rem;
+	} else {
+		kevent->event.len = 0;
+		kevent->name = NULL;
+	}
+
+	return kevent;
+}
+
+/*
+ * inotify_dev_get_event - return the next event in the given dev's queue
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static inline struct inotify_kernel_event *
+inotify_dev_get_event(struct inotify_device *dev)
+{
+	return list_entry(dev->events.next, struct inotify_kernel_event, list);
+}
+
+/*
+ * inotify_dev_queue_event - event handler registered with core inotify, adds
+ * a new event to the given device
+ *
+ * Can sleep (calls kernel_event()).
+ */
+static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
+				    u32 cookie, const char *name)
+{
+	struct inotify_user_watch *watch;
+	struct inotify_device *dev;
+	struct inotify_kernel_event *kevent, *last;
+
+	watch = container_of(w, struct inotify_user_watch, wdata);
+	dev = watch->dev;
+
+	mutex_lock(&dev->ev_mutex);
+
+	/* we can safely put the watch as we don't reference it while
+	 * generating the event
+	 */
+	if (mask & IN_IGNORED || mask & IN_ONESHOT)
+		put_inotify_watch(w); /* final put */
+
+	/* coalescing: drop this event if it is a dupe of the previous */
+	last = inotify_dev_get_event(dev);
+	if (last && last->event.mask == mask && last->event.wd == wd &&
+			last->event.cookie == cookie) {
+		const char *lastname = last->name;
+
+		if (!name && !lastname)
+			goto out;
+		if (name && lastname && !strcmp(lastname, name))
+			goto out;
+	}
+
+	/* the queue overflowed and we already sent the Q_OVERFLOW event */
+	if (unlikely(dev->event_count > dev->max_events))
+		goto out;
+
+	/* if the queue overflows, we need to notify user space */
+	if (unlikely(dev->event_count == dev->max_events))
+		kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
+	else
+		kevent = kernel_event(wd, mask, cookie, name);
+
+	if (unlikely(!kevent))
+		goto out;
+
+	/* queue the event and wake up anyone waiting */
+	dev->event_count++;
+	dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
+	list_add_tail(&kevent->list, &dev->events);
+	wake_up_interruptible(&dev->wq);
+
+out:
+	mutex_unlock(&dev->ev_mutex);
+}
+
+/*
+ * remove_kevent - cleans up and ultimately frees the given kevent
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static void remove_kevent(struct inotify_device *dev,
+			  struct inotify_kernel_event *kevent)
+{
+	list_del(&kevent->list);
+
+	dev->event_count--;
+	dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
+
+	kfree(kevent->name);
+	kmem_cache_free(event_cachep, kevent);
+}
+
+/*
+ * inotify_dev_event_dequeue - destroy an event on the given device
+ *
+ * Caller must hold dev->ev_mutex.
+ */
+static void inotify_dev_event_dequeue(struct inotify_device *dev)
+{
+	if (!list_empty(&dev->events)) {
+		struct inotify_kernel_event *kevent;
+		kevent = inotify_dev_get_event(dev);
+		remove_kevent(dev, kevent);
+	}
+}
+
+/*
+ * find_inode - resolve a user-given path to a specific inode and return a nd
+ */
+static int find_inode(const char __user *dirname, struct nameidata *nd,
+		      unsigned flags)
+{
+	int error;
+
+	error = __user_walk(dirname, flags, nd);
+	if (error)
+		return error;
+	/* you can only watch an inode if you have read permissions on it */
+	error = vfs_permission(nd, MAY_READ);
+	if (error)
+		path_release(nd);
+	return error;
+}
+
+/*
+ * create_watch - creates a watch on the given device.
+ *
+ * Callers must hold dev->up_mutex.
+ */
+static int create_watch(struct inotify_device *dev, struct inode *inode,
+			u32 mask)
+{
+	struct inotify_user_watch *watch;
+	int ret;
+
+	if (atomic_read(&dev->user->inotify_watches) >=
+			inotify_max_user_watches)
+		return -ENOSPC;
+
+	watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
+	if (unlikely(!watch))
+		return -ENOMEM;
+
+	/* save a reference to device and bump the count to make it official */
+	get_inotify_dev(dev);
+	watch->dev = dev;
+
+	atomic_inc(&dev->user->inotify_watches);
+
+	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
+	if (ret < 0)
+		free_inotify_user_watch(&watch->wdata);
+
+	return ret;
+}
+
+/* Device Interface */
+
+static unsigned int inotify_poll(struct file *file, poll_table *wait)
+{
+	struct inotify_device *dev = file->private_data;
+	int ret = 0;
+
+	poll_wait(file, &dev->wq, wait);
+	mutex_lock(&dev->ev_mutex);
+	if (!list_empty(&dev->events))
+		ret = POLLIN | POLLRDNORM;
+	mutex_unlock(&dev->ev_mutex);
+
+	return ret;
+}
+
+static ssize_t inotify_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *pos)
+{
+	size_t event_size = sizeof (struct inotify_event);
+	struct inotify_device *dev;
+	char __user *start;
+	int ret;
+	DEFINE_WAIT(wait);
+
+	start = buf;
+	dev = file->private_data;
+
+	while (1) {
+		int events;
+
+		prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
+
+		mutex_lock(&dev->ev_mutex);
+		events = !list_empty(&dev->events);
+		mutex_unlock(&dev->ev_mutex);
+		if (events) {
+			ret = 0;
+			break;
+		}
+
+		if (file->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		schedule();
+	}
+
+	finish_wait(&dev->wq, &wait);
+	if (ret)
+		return ret;
+
+	mutex_lock(&dev->ev_mutex);
+	while (1) {
+		struct inotify_kernel_event *kevent;
+
+		ret = buf - start;
+		if (list_empty(&dev->events))
+			break;
+
+		kevent = inotify_dev_get_event(dev);
+		if (event_size + kevent->event.len > count)
+			break;
+
+		if (copy_to_user(buf, &kevent->event, event_size)) {
+			ret = -EFAULT;
+			break;
+		}
+		buf += event_size;
+		count -= event_size;
+
+		if (kevent->name) {
+			if (copy_to_user(buf, kevent->name, kevent->event.len)){
+				ret = -EFAULT;
+				break;
+			}
+			buf += kevent->event.len;
+			count -= kevent->event.len;
+		}
+
+		remove_kevent(dev, kevent);
+	}
+	mutex_unlock(&dev->ev_mutex);
+
+	return ret;
+}
+
+static int inotify_release(struct inode *ignored, struct file *file)
+{
+	struct inotify_device *dev = file->private_data;
+
+	inotify_destroy(dev->ih);
+
+	/* destroy all of the events on this device */
+	mutex_lock(&dev->ev_mutex);
+	while (!list_empty(&dev->events))
+		inotify_dev_event_dequeue(dev);
+	mutex_unlock(&dev->ev_mutex);
+
+	/* free this device: the put matching the get in inotify_init() */
+	put_inotify_dev(dev);
+
+	return 0;
+}
+
+static long inotify_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct inotify_device *dev;
+	void __user *p;
+	int ret = -ENOTTY;
+
+	dev = file->private_data;
+	p = (void __user *) arg;
+
+	switch (cmd) {
+	case FIONREAD:
+		ret = put_user(dev->queue_size, (int __user *) p);
+		break;
+	}
+
+	return ret;
+}
+
+static const struct file_operations inotify_fops = {
+	.poll           = inotify_poll,
+	.read           = inotify_read,
+	.release        = inotify_release,
+	.unlocked_ioctl = inotify_ioctl,
+	.compat_ioctl	= inotify_ioctl,
+};
+
+static const struct inotify_operations inotify_user_ops = {
+	.handle_event	= inotify_dev_queue_event,
+	.destroy_watch	= free_inotify_user_watch,
+};
+
+asmlinkage long sys_inotify_init(void)
+{
+	struct inotify_device *dev;
+	struct inotify_handle *ih;
+	struct user_struct *user;
+	struct file *filp;
+	int fd, ret;
+
+	fd = get_unused_fd();
+	if (fd < 0)
+		return fd;
+
+	filp = get_empty_filp();
+	if (!filp) {
+		ret = -ENFILE;
+		goto out_put_fd;
+	}
+
+	user = get_uid(current->user);
+	if (unlikely(atomic_read(&user->inotify_devs) >=
+			inotify_max_user_instances)) {
+		ret = -EMFILE;
+		goto out_free_uid;
+	}
+
+	dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
+	if (unlikely(!dev)) {
+		ret = -ENOMEM;
+		goto out_free_uid;
+	}
+
+	ih = inotify_init(&inotify_user_ops);
+	if (unlikely(IS_ERR(ih))) {
+		ret = PTR_ERR(ih);
+		goto out_free_dev;
+	}
+	dev->ih = ih;
+
+	filp->f_op = &inotify_fops;
+	filp->f_vfsmnt = mntget(inotify_mnt);
+	filp->f_dentry = dget(inotify_mnt->mnt_root);
+	filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
+	filp->f_mode = FMODE_READ;
+	filp->f_flags = O_RDONLY;
+	filp->private_data = dev;
+
+	INIT_LIST_HEAD(&dev->events);
+	init_waitqueue_head(&dev->wq);
+	mutex_init(&dev->ev_mutex);
+	mutex_init(&dev->up_mutex);
+	dev->event_count = 0;
+	dev->queue_size = 0;
+	dev->max_events = inotify_max_queued_events;
+	dev->user = user;
+	atomic_set(&dev->count, 0);
+
+	get_inotify_dev(dev);
+	atomic_inc(&user->inotify_devs);
+	fd_install(fd, filp);
+
+	return fd;
+out_free_dev:
+	kfree(dev);
+out_free_uid:
+	free_uid(user);
+	put_filp(filp);
+out_put_fd:
+	put_unused_fd(fd);
+	return ret;
+}
+
+asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
+{
+	struct inode *inode;
+	struct inotify_device *dev;
+	struct nameidata nd;
+	struct file *filp;
+	int ret, fput_needed;
+	unsigned flags = 0;
+
+	filp = fget_light(fd, &fput_needed);
+	if (unlikely(!filp))
+		return -EBADF;
+
+	/* verify that this is indeed an inotify instance */
+	if (unlikely(filp->f_op != &inotify_fops)) {
+		ret = -EINVAL;
+		goto fput_and_out;
+	}
+
+	if (!(mask & IN_DONT_FOLLOW))
+		flags |= LOOKUP_FOLLOW;
+	if (mask & IN_ONLYDIR)
+		flags |= LOOKUP_DIRECTORY;
+
+	ret = find_inode(path, &nd, flags);
+	if (unlikely(ret))
+		goto fput_and_out;
+
+	/* inode held in place by reference to nd; dev by fget on fd */
+	inode = nd.dentry->d_inode;
+	dev = filp->private_data;
+
+	mutex_lock(&dev->up_mutex);
+	ret = inotify_find_update_watch(dev->ih, inode, mask);
+	if (ret == -ENOENT)
+		ret = create_watch(dev, inode, mask);
+	mutex_unlock(&dev->up_mutex);
+
+	path_release(&nd);
+fput_and_out:
+	fput_light(filp, fput_needed);
+	return ret;
+}
+
+asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
+{
+	struct file *filp;
+	struct inotify_device *dev;
+	int ret, fput_needed;
+
+	filp = fget_light(fd, &fput_needed);
+	if (unlikely(!filp))
+		return -EBADF;
+
+	/* verify that this is indeed an inotify instance */
+	if (unlikely(filp->f_op != &inotify_fops)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	dev = filp->private_data;
+
+	/* we free our watch data when we get IN_IGNORED */
+	ret = inotify_rm_wd(dev->ih, wd);
+
+out:
+	fput_light(filp, fput_needed);
+	return ret;
+}
+
+static struct super_block *
+inotify_get_sb(struct file_system_type *fs_type, int flags,
+	       const char *dev_name, void *data)
+{
+    return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
+}
+
+static struct file_system_type inotify_fs_type = {
+    .name           = "inotifyfs",
+    .get_sb         = inotify_get_sb,
+    .kill_sb        = kill_anon_super,
+};
+
+/*
+ * inotify_user_setup - Our initialization function.  Note that we cannnot return
+ * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
+ * must result in panic().
+ */
+static int __init inotify_user_setup(void)
+{
+	int ret;
+
+	ret = register_filesystem(&inotify_fs_type);
+	if (unlikely(ret))
+		panic("inotify: register_filesystem returned %d!\n", ret);
+
+	inotify_mnt = kern_mount(&inotify_fs_type);
+	if (IS_ERR(inotify_mnt))
+		panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
+
+	inotify_max_queued_events = 16384;
+	inotify_max_user_instances = 128;
+	inotify_max_user_watches = 8192;
+
+	watch_cachep = kmem_cache_create("inotify_watch_cache",
+					 sizeof(struct inotify_user_watch),
+					 0, SLAB_PANIC, NULL, NULL);
+	event_cachep = kmem_cache_create("inotify_event_cache",
+					 sizeof(struct inotify_kernel_event),
+					 0, SLAB_PANIC, NULL, NULL);
+
+	return 0;
+}
+
+module_init(inotify_user_setup);
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 71aa1553ef38..68b6e0127de4 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -68,8 +68,37 @@ struct inotify_event {
 #include <linux/dcache.h>
 #include <linux/fs.h>
 
+/*
+ * struct inotify_watch - represents a watch request on a specific inode
+ *
+ * h_list is protected by ih->mutex of the associated inotify_handle.
+ * i_list, mask are protected by inode->inotify_mutex of the associated inode.
+ * ih, inode, and wd are never written to once the watch is created.
+ *
+ * Callers must use the established inotify interfaces to access inotify_watch
+ * contents.  The content of this structure is private to the inotify
+ * implementation.
+ */
+struct inotify_watch {
+	struct list_head	h_list;	/* entry in inotify_handle's list */
+	struct list_head	i_list;	/* entry in inode's list */
+	atomic_t		count;	/* reference count */
+	struct inotify_handle	*ih;	/* associated inotify handle */
+	struct inode		*inode;	/* associated inode */
+	__s32			wd;	/* watch descriptor */
+	__u32			mask;	/* event mask for this watch */
+};
+
+struct inotify_operations {
+	void (*handle_event)(struct inotify_watch *, u32, u32, u32,
+			     const char *);
+	void (*destroy_watch)(struct inotify_watch *);
+};
+
 #ifdef CONFIG_INOTIFY
 
+/* Kernel API for producing events */
+
 extern void inotify_d_instantiate(struct dentry *, struct inode *);
 extern void inotify_d_move(struct dentry *);
 extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
@@ -80,6 +109,18 @@ extern void inotify_unmount_inodes(struct list_head *);
 extern void inotify_inode_is_dead(struct inode *);
 extern u32 inotify_get_cookie(void);
 
+/* Kernel Consumer API */
+
+extern struct inotify_handle *inotify_init(const struct inotify_operations *);
+extern void inotify_destroy(struct inotify_handle *);
+extern __s32 inotify_find_update_watch(struct inotify_handle *, struct inode *,
+				       u32);
+extern __s32 inotify_add_watch(struct inotify_handle *, struct inotify_watch *,
+			       struct inode *, __u32);
+extern int inotify_rm_wd(struct inotify_handle *, __u32);
+extern void get_inotify_watch(struct inotify_watch *);
+extern void put_inotify_watch(struct inotify_watch *);
+
 #else
 
 static inline void inotify_d_instantiate(struct dentry *dentry,
@@ -116,6 +157,41 @@ static inline u32 inotify_get_cookie(void)
 	return 0;
 }
 
+static inline struct inotify_handle *inotify_init(const struct inotify_operations *ops)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void inotify_destroy(struct inotify_handle *ih)
+{
+}
+
+static inline __s32 inotify_find_update_watch(struct inotify_handle *ih,
+					      struct inode *inode, u32 mask)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline __s32 inotify_add_watch(struct inotify_handle *ih,
+				      struct inotify_watch *watch,
+				      struct inode *inode, __u32 mask)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int inotify_rm_wd(struct inotify_handle *ih, __u32 wd)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void get_inotify_watch(struct inotify_watch *watch)
+{
+}
+
+static inline void put_inotify_watch(struct inotify_watch *watch)
+{
+}
+
 #endif	/* CONFIG_INOTIFY */
 
 #endif	/* __KERNEL __ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 29b7d4f87d20..864e5a70ff65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -494,7 +494,7 @@ struct user_struct {
 	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t files;		/* How many open files does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
-#ifdef CONFIG_INOTIFY
+#ifdef CONFIG_INOTIFY_USER
 	atomic_t inotify_watches; /* How many inotify watches does this user have? */
 	atomic_t inotify_devs;	/* How many inotify devs does this user have opened? */
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e82726faeeff..0d656e61621d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -150,7 +150,7 @@ extern ctl_table random_table[];
 #ifdef CONFIG_UNIX98_PTYS
 extern ctl_table pty_table[];
 #endif
-#ifdef CONFIG_INOTIFY
+#ifdef CONFIG_INOTIFY_USER
 extern ctl_table inotify_table[];
 #endif
 
@@ -1028,7 +1028,7 @@ static ctl_table fs_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 	},
-#ifdef CONFIG_INOTIFY
+#ifdef CONFIG_INOTIFY_USER
 	{
 		.ctl_name	= FS_INOTIFY,
 		.procname	= "inotify",
diff --git a/kernel/user.c b/kernel/user.c
index 2116642f42c6..4b1eb745afa1 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -140,7 +140,7 @@ struct user_struct * alloc_uid(uid_t uid)
 		atomic_set(&new->processes, 0);
 		atomic_set(&new->files, 0);
 		atomic_set(&new->sigpending, 0);
-#ifdef CONFIG_INOTIFY
+#ifdef CONFIG_INOTIFY_USER
 		atomic_set(&new->inotify_watches, 0);
 		atomic_set(&new->inotify_devs, 0);
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 7c29772288b7026504cfe75bfd90d40fbd1574bf Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 1 Jun 2006 13:11:01 -0700
Subject: [PATCH] inotify (2/5): add name's inode to event handler

When an inotify event includes a dentry name, also include the inode
associated with that name.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Acked-by: Robert Love <rml@novell.com>
Acked-by: John McCutchan <john@johnmccutchan.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inotify.c             | 13 ++++++++-----
 fs/inotify_user.c        |  3 ++-
 include/linux/fsnotify.h | 29 ++++++++++++++++-------------
 include/linux/inotify.h  |  7 ++++---
 4 files changed, 30 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/inotify.c b/fs/inotify.c
index a1bedf3975ca..f25c21801fdc 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -232,7 +232,7 @@ static void remove_watch_no_event(struct inotify_watch *watch,
 static void remove_watch(struct inotify_watch *watch, struct inotify_handle *ih)
 {
 	remove_watch_no_event(watch, ih);
-	ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL);
+	ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
 }
 
 /* Kernel API for producing events */
@@ -275,9 +275,10 @@ void inotify_d_move(struct dentry *entry)
  * @mask: event mask describing this event
  * @cookie: cookie for synchronization, or zero
  * @name: filename, if any
+ * @n_inode: inode associated with name
  */
 void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
-			       const char *name)
+			       const char *name, struct inode *n_inode)
 {
 	struct inotify_watch *watch, *next;
 
@@ -292,7 +293,8 @@ void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
 			mutex_lock(&ih->mutex);
 			if (watch_mask & IN_ONESHOT)
 				remove_watch_no_event(watch, ih);
-			ih->in_ops->handle_event(watch, watch->wd, mask, cookie, name);
+			ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
+						 name, n_inode);
 			mutex_unlock(&ih->mutex);
 		}
 	}
@@ -323,7 +325,8 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
 	if (inotify_inode_watched(inode)) {
 		dget(parent);
 		spin_unlock(&dentry->d_lock);
-		inotify_inode_queue_event(inode, mask, cookie, name);
+		inotify_inode_queue_event(inode, mask, cookie, name,
+					  dentry->d_inode);
 		dput(parent);
 	} else
 		spin_unlock(&dentry->d_lock);
@@ -407,7 +410,7 @@ void inotify_unmount_inodes(struct list_head *list)
 			struct inotify_handle *ih= watch->ih;
 			mutex_lock(&ih->mutex);
 			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
-						 NULL);
+						 NULL, NULL);
 			remove_watch(watch, ih);
 			mutex_unlock(&ih->mutex);
 		}
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 845dc79a4e9c..8b83c7190067 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -253,7 +253,8 @@ inotify_dev_get_event(struct inotify_device *dev)
  * Can sleep (calls kernel_event()).
  */
 static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
-				    u32 cookie, const char *name)
+				    u32 cookie, const char *name,
+				    struct inode *ignored)
 {
 	struct inotify_user_watch *watch;
 	struct inotify_device *dev;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 11438eff4d44..a9d30442448f 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -54,16 +54,18 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 
 	if (isdir)
 		isdir = IN_ISDIR;
-	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name);
-	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name);
+	inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name,
+				  source);
+	inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name,
+				  source);
 
 	if (target) {
-		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL);
+		inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
 		inotify_inode_is_dead(target);
 	}
 
 	if (source) {
-		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL);
+		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
 	}
 	audit_inode_child(old_name, source, old_dir->i_ino);
 	audit_inode_child(new_name, target, new_dir->i_ino);
@@ -85,7 +87,7 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
  */
 static inline void fsnotify_inoderemove(struct inode *inode)
 {
-	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL);
+	inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
 	inotify_inode_is_dead(inode);
 }
 
@@ -95,7 +97,8 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
 	inode_dir_notify(inode, DN_CREATE);
-	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name);
+	inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
+				  dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino);
 }
 
@@ -106,7 +109,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 {
 	inode_dir_notify(inode, DN_CREATE);
 	inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, 
-				  dentry->d_name.name);
+				  dentry->d_name.name, dentry->d_inode);
 	audit_inode_child(dentry->d_name.name, dentry->d_inode, inode->i_ino);
 }
 
@@ -123,7 +126,7 @@ static inline void fsnotify_access(struct dentry *dentry)
 
 	dnotify_parent(dentry, DN_ACCESS);
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
-	inotify_inode_queue_event(inode, mask, 0, NULL);
+	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 }
 
 /*
@@ -139,7 +142,7 @@ static inline void fsnotify_modify(struct dentry *dentry)
 
 	dnotify_parent(dentry, DN_MODIFY);
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
-	inotify_inode_queue_event(inode, mask, 0, NULL);
+	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 }
 
 /*
@@ -154,7 +157,7 @@ static inline void fsnotify_open(struct dentry *dentry)
 		mask |= IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
-	inotify_inode_queue_event(inode, mask, 0, NULL);	
+	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 }
 
 /*
@@ -172,7 +175,7 @@ static inline void fsnotify_close(struct file *file)
 		mask |= IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, name);
-	inotify_inode_queue_event(inode, mask, 0, NULL);
+	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 }
 
 /*
@@ -187,7 +190,7 @@ static inline void fsnotify_xattr(struct dentry *dentry)
 		mask |= IN_ISDIR;
 
 	inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
-	inotify_inode_queue_event(inode, mask, 0, NULL);
+	inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
 }
 
 /*
@@ -234,7 +237,7 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 	if (in_mask) {
 		if (S_ISDIR(inode->i_mode))
 			in_mask |= IN_ISDIR;
-		inotify_inode_queue_event(inode, in_mask, 0, NULL);
+		inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL);
 		inotify_dentry_parent_queue_event(dentry, in_mask, 0,
 						  dentry->d_name.name);
 	}
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 68b6e0127de4..e7899e7d83ad 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -91,7 +91,7 @@ struct inotify_watch {
 
 struct inotify_operations {
 	void (*handle_event)(struct inotify_watch *, u32, u32, u32,
-			     const char *);
+			     const char *, struct inode *);
 	void (*destroy_watch)(struct inotify_watch *);
 };
 
@@ -102,7 +102,7 @@ struct inotify_operations {
 extern void inotify_d_instantiate(struct dentry *, struct inode *);
 extern void inotify_d_move(struct dentry *);
 extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
-				      const char *);
+				      const char *, struct inode *);
 extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
 					      const char *);
 extern void inotify_unmount_inodes(struct list_head *);
@@ -134,7 +134,8 @@ static inline void inotify_d_move(struct dentry *dentry)
 
 static inline void inotify_inode_queue_event(struct inode *inode,
 					     __u32 mask, __u32 cookie,
-					     const char *filename)
+					     const char *filename,
+					     struct inode *n_inode)
 {
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a9dc971d3fdb857a2bcd6d53238125a2cd31d5f4 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 1 Jun 2006 13:11:03 -0700
Subject: [PATCH] inotify (3/5): add interfaces to kernel API

Add inotify_init_watch() so caller can use inotify_watch refcounts
before calling inotify_add_watch().

Add inotify_find_watch() to find an existing watch for an (ih,inode)
pair.  This is similar to inotify_find_update_watch(), but does not
update the watch's mask if one is found.

Add inotify_rm_watch() to remove a watch via the watch pointer instead
of the watch descriptor.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Acked-by: Robert Love <rml@novell.com>
Acked-by: John McCutchan <john@johnmccutchan.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inotify.c            | 64 ++++++++++++++++++++++++++++++++++++++++++++-----
 fs/inotify_user.c       |  1 +
 include/linux/inotify.h | 20 ++++++++++++++++
 3 files changed, 79 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/inotify.c b/fs/inotify.c
index f25c21801fdc..8477c4fbecb4 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -467,6 +467,19 @@ struct inotify_handle *inotify_init(const struct inotify_operations *ops)
 }
 EXPORT_SYMBOL_GPL(inotify_init);
 
+/**
+ * inotify_init_watch - initialize an inotify watch
+ * @watch: watch to initialize
+ */
+void inotify_init_watch(struct inotify_watch *watch)
+{
+	INIT_LIST_HEAD(&watch->h_list);
+	INIT_LIST_HEAD(&watch->i_list);
+	atomic_set(&watch->count, 0);
+	get_inotify_watch(watch); /* initial get */
+}
+EXPORT_SYMBOL_GPL(inotify_init_watch);
+
 /**
  * inotify_destroy - clean up and destroy an inotify instance
  * @ih: inotify handle
@@ -514,6 +527,37 @@ void inotify_destroy(struct inotify_handle *ih)
 }
 EXPORT_SYMBOL_GPL(inotify_destroy);
 
+/**
+ * inotify_find_watch - find an existing watch for an (ih,inode) pair
+ * @ih: inotify handle
+ * @inode: inode to watch
+ * @watchp: pointer to existing inotify_watch
+ *
+ * Caller must pin given inode (via nameidata).
+ */
+s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
+		       struct inotify_watch **watchp)
+{
+	struct inotify_watch *old;
+	int ret = -ENOENT;
+
+	mutex_lock(&inode->inotify_mutex);
+	mutex_lock(&ih->mutex);
+
+	old = inode_find_handle(inode, ih);
+	if (unlikely(old)) {
+		get_inotify_watch(old); /* caller must put watch */
+		*watchp = old;
+		ret = old->wd;
+	}
+
+	mutex_unlock(&ih->mutex);
+	mutex_unlock(&inode->inotify_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(inotify_find_watch);
+
 /**
  * inotify_find_update_watch - find and update the mask of an existing watch
  * @ih: inotify handle
@@ -593,10 +637,6 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
 		goto out;
 	ret = watch->wd;
 
-	atomic_set(&watch->count, 0);
-	INIT_LIST_HEAD(&watch->h_list);
-	INIT_LIST_HEAD(&watch->i_list);
-
 	/* save a reference to handle and bump the count to make it official */
 	get_inotify_handle(ih);
 	watch->ih = ih;
@@ -607,8 +647,6 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
 	 */
 	watch->inode = igrab(inode);
 
-	get_inotify_watch(watch); /* initial get */
-
 	if (!inotify_inode_watched(inode))
 		set_dentry_child_flags(inode, 1);
 
@@ -659,6 +697,20 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
 }
 EXPORT_SYMBOL_GPL(inotify_rm_wd);
 
+/**
+ * inotify_rm_watch - remove a watch from an inotify instance
+ * @ih: inotify handle
+ * @watch: watch to remove
+ *
+ * Can sleep.
+ */
+int inotify_rm_watch(struct inotify_handle *ih,
+		     struct inotify_watch *watch)
+{
+	return inotify_rm_wd(ih, watch->wd);
+}
+EXPORT_SYMBOL_GPL(inotify_rm_watch);
+
 /*
  * inotify_setup - core initialization function
  */
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 8b83c7190067..9e9931e2badd 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -380,6 +380,7 @@ static int create_watch(struct inotify_device *dev, struct inode *inode,
 
 	atomic_inc(&dev->user->inotify_watches);
 
+	inotify_init_watch(&watch->wdata);
 	ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
 	if (ret < 0)
 		free_inotify_user_watch(&watch->wdata);
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index e7899e7d83ad..e7e7fb7fc778 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -112,11 +112,15 @@ extern u32 inotify_get_cookie(void);
 /* Kernel Consumer API */
 
 extern struct inotify_handle *inotify_init(const struct inotify_operations *);
+extern void inotify_init_watch(struct inotify_watch *);
 extern void inotify_destroy(struct inotify_handle *);
+extern __s32 inotify_find_watch(struct inotify_handle *, struct inode *,
+				struct inotify_watch **);
 extern __s32 inotify_find_update_watch(struct inotify_handle *, struct inode *,
 				       u32);
 extern __s32 inotify_add_watch(struct inotify_handle *, struct inotify_watch *,
 			       struct inode *, __u32);
+extern int inotify_rm_watch(struct inotify_handle *, struct inotify_watch *);
 extern int inotify_rm_wd(struct inotify_handle *, __u32);
 extern void get_inotify_watch(struct inotify_watch *);
 extern void put_inotify_watch(struct inotify_watch *);
@@ -163,10 +167,20 @@ static inline struct inotify_handle *inotify_init(const struct inotify_operation
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline void inotify_init_watch(struct inotify_watch *watch)
+{
+}
+
 static inline void inotify_destroy(struct inotify_handle *ih)
 {
 }
 
+static inline __s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
+				       struct inotify_watch **watchp)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline __s32 inotify_find_update_watch(struct inotify_handle *ih,
 					      struct inode *inode, u32 mask)
 {
@@ -180,6 +194,12 @@ static inline __s32 inotify_add_watch(struct inotify_handle *ih,
 	return -EOPNOTSUPP;
 }
 
+static inline int inotify_rm_watch(struct inotify_handle *ih,
+				   struct inotify_watch *watch)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int inotify_rm_wd(struct inotify_handle *ih, __u32 wd)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3-59-g8ed1b


From 3ca10067f7f4bfa62a1b0edc84f590261fa02d75 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 1 Jun 2006 13:11:05 -0700
Subject: [PATCH] inotify (4/5): allow watch removal from event handler

Allow callers to remove watches from their event handler via
inotify_remove_watch_locked().  This functionality can be used to
achieve IN_ONESHOT-like functionality for a subset of events in the
mask.

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Acked-by: Robert Love <rml@novell.com>
Acked-by: John McCutchan <john@johnmccutchan.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inotify.c            | 23 ++++++++++++++---------
 include/linux/inotify.h |  7 +++++++
 2 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/inotify.c b/fs/inotify.c
index 8477c4fbecb4..723836a1f718 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -207,7 +207,7 @@ static struct inotify_watch *inode_find_handle(struct inode *inode,
 }
 
 /*
- * remove_watch_no_event - remove_watch() without the IN_IGNORED event.
+ * remove_watch_no_event - remove watch without the IN_IGNORED event.
  *
  * Callers must hold both inode->inotify_mutex and ih->mutex.
  */
@@ -223,17 +223,22 @@ static void remove_watch_no_event(struct inotify_watch *watch,
 	idr_remove(&ih->idr, watch->wd);
 }
 
-/*
- * remove_watch - Remove a watch from both the handle and the inode.  Sends
- * the IN_IGNORED event signifying that the inode is no longer watched.
+/**
+ * inotify_remove_watch_locked - Remove a watch from both the handle and the
+ * inode.  Sends the IN_IGNORED event signifying that the inode is no longer
+ * watched.  May be invoked from a caller's event handler.
+ * @ih: inotify handle associated with watch
+ * @watch: watch to remove
  *
  * Callers must hold both inode->inotify_mutex and ih->mutex.
  */
-static void remove_watch(struct inotify_watch *watch, struct inotify_handle *ih)
+void inotify_remove_watch_locked(struct inotify_handle *ih,
+				 struct inotify_watch *watch)
 {
 	remove_watch_no_event(watch, ih);
 	ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
 }
+EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
 
 /* Kernel API for producing events */
 
@@ -378,7 +383,7 @@ void inotify_unmount_inodes(struct list_head *list)
 
 		need_iput_tmp = need_iput;
 		need_iput = NULL;
-		/* In case the remove_watch() drops a reference. */
+		/* In case inotify_remove_watch_locked() drops a reference. */
 		if (inode != need_iput_tmp)
 			__iget(inode);
 		else
@@ -411,7 +416,7 @@ void inotify_unmount_inodes(struct list_head *list)
 			mutex_lock(&ih->mutex);
 			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
 						 NULL, NULL);
-			remove_watch(watch, ih);
+			inotify_remove_watch_locked(ih, watch);
 			mutex_unlock(&ih->mutex);
 		}
 		mutex_unlock(&inode->inotify_mutex);
@@ -434,7 +439,7 @@ void inotify_inode_is_dead(struct inode *inode)
 	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
 		struct inotify_handle *ih = watch->ih;
 		mutex_lock(&ih->mutex);
-		remove_watch(watch, ih);
+		inotify_remove_watch_locked(ih, watch);
 		mutex_unlock(&ih->mutex);
 	}
 	mutex_unlock(&inode->inotify_mutex);
@@ -687,7 +692,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
 
 	/* make sure that we did not race */
 	if (likely(idr_find(&ih->idr, wd) == watch))
-		remove_watch(watch, ih);
+		inotify_remove_watch_locked(ih, watch);
 
 	mutex_unlock(&ih->mutex);
 	mutex_unlock(&inode->inotify_mutex);
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index e7e7fb7fc778..d4f48c6402e6 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -122,6 +122,8 @@ extern __s32 inotify_add_watch(struct inotify_handle *, struct inotify_watch *,
 			       struct inode *, __u32);
 extern int inotify_rm_watch(struct inotify_handle *, struct inotify_watch *);
 extern int inotify_rm_wd(struct inotify_handle *, __u32);
+extern void inotify_remove_watch_locked(struct inotify_handle *,
+					struct inotify_watch *);
 extern void get_inotify_watch(struct inotify_watch *);
 extern void put_inotify_watch(struct inotify_watch *);
 
@@ -205,6 +207,11 @@ static inline int inotify_rm_wd(struct inotify_handle *ih, __u32 wd)
 	return -EOPNOTSUPP;
 }
 
+static inline void inotify_remove_watch_locked(struct inotify_handle *ih,
+					       struct inotify_watch *watch)
+{
+}
+
 static inline void get_inotify_watch(struct inotify_watch *watch)
 {
 }
-- 
cgit v1.2.3-59-g8ed1b


From bc0f3b8ebba611291fdaa2864dbffd2d29336c64 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 22 May 2006 01:36:34 -0400
Subject: [PATCH] audit_panic() is audit-internal

... no need to provide a stub; note that extern is already gone from
include/linux/audit.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b74c148f14e3..e65399bf2710 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -384,7 +384,6 @@ extern int  audit_receive_filter(int type, int pid, int uid, int seq,
 #define audit_log_hex(a,b,l) do { ; } while (0)
 #define audit_log_untrustedstring(a,s) do { ; } while (0)
 #define audit_log_d_path(b,p,d,v) do { ; } while (0)
-#define audit_panic(m) do { ; } while (0)
 #endif
 #endif
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 473ae30bc7b1dda5c5791c773f95e9424ddfead9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 26 Apr 2006 14:04:08 -0400
Subject: [PATCH] execve argument logging

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c             |  6 ++++++
 include/linux/audit.h |  6 +++++-
 kernel/audit.c        |  8 +++++---
 kernel/auditsc.c      | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 67 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 3a79d97ac234..d07858c0b7c4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -49,6 +49,7 @@
 #include <linux/rmap.h>
 #include <linux/acct.h>
 #include <linux/cn_proc.h>
+#include <linux/audit.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1085,6 +1086,11 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 	/* kernel module loader fixup */
 	/* so we don't try to load run modprobe in kernel space. */
 	set_fs(USER_DS);
+
+	retval = audit_bprm(bprm);
+	if (retval)
+		return retval;
+
 	retval = -ENOENT;
 	for (try=0; try<2; try++) {
 		read_lock(&binfmt_lock);
diff --git a/include/linux/audit.h b/include/linux/audit.h
index e65399bf2710..1a221b65f7b7 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -83,6 +83,7 @@
 #define AUDIT_CONFIG_CHANGE	1305	/* Audit system configuration change */
 #define AUDIT_SOCKADDR		1306	/* sockaddr copied as syscall arg */
 #define AUDIT_CWD		1307	/* Current working directory */
+#define AUDIT_EXECVE		1309	/* execve arguments */
 #define AUDIT_IPC_SET_PERM	1311	/* IPC new permissions record type */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
@@ -283,6 +284,7 @@ struct audit_buffer;
 struct audit_context;
 struct inode;
 struct netlink_skb_parms;
+struct linux_binprm;
 
 #define AUDITSC_INVALID 0
 #define AUDITSC_SUCCESS 1
@@ -322,6 +324,7 @@ extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
 extern int audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp);
+extern int audit_bprm(struct linux_binprm *bprm);
 extern int audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt);
@@ -342,6 +345,7 @@ extern int audit_set_macxattr(const char *name);
 #define audit_get_loginuid(c) ({ -1; })
 #define audit_ipc_obj(i) ({ 0; })
 #define audit_ipc_set_perm(q,u,g,m,i) ({ 0; })
+#define audit_bprm(p) ({ 0; })
 #define audit_socketcall(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_avc_path(dentry, mnt) ({ 0; })
@@ -364,7 +368,7 @@ extern void		    audit_log_end(struct audit_buffer *ab);
 extern void		    audit_log_hex(struct audit_buffer *ab,
 					  const unsigned char *buf,
 					  size_t len);
-extern void		    audit_log_untrustedstring(struct audit_buffer *ab,
+extern const char *	    audit_log_untrustedstring(struct audit_buffer *ab,
 						      const char *string);
 extern void		    audit_log_d_path(struct audit_buffer *ab,
 					     const char *prefix,
diff --git a/kernel/audit.c b/kernel/audit.c
index bf74bf02aa4b..d09f131b111a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1026,18 +1026,20 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf,
  * or a space. Unescaped strings will start and end with a double quote mark.
  * Strings that are escaped are printed in hex (2 digits per char).
  */
-void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
+const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
 {
 	const unsigned char *p = string;
+	size_t len = strlen(string);
 
 	while (*p) {
 		if (*p == '"' || *p < 0x21 || *p > 0x7f) {
-			audit_log_hex(ab, string, strlen(string));
-			return;
+			audit_log_hex(ab, string, len);
+			return string + len + 1;
 		}
 		p++;
 	}
 	audit_log_format(ab, "\"%s\"", string);
+	return p + 1;
 }
 
 /* This is a helper-function to print the escaped d_path */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1c03a4ed1b27..114f921979ec 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -59,6 +59,7 @@
 #include <linux/list.h>
 #include <linux/tty.h>
 #include <linux/selinux.h>
+#include <linux/binfmts.h>
 
 #include "audit.h"
 
@@ -110,6 +111,13 @@ struct audit_aux_data_ipcctl {
 	u32			osid;
 };
 
+struct audit_aux_data_execve {
+	struct audit_aux_data	d;
+	int argc;
+	int envc;
+	char mem[0];
+};
+
 struct audit_aux_data_socketcall {
 	struct audit_aux_data	d;
 	int			nargs;
@@ -667,6 +675,16 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				kfree(ctx);
 			}
 			break; }
+		case AUDIT_EXECVE: {
+			struct audit_aux_data_execve *axi = (void *)aux;
+			int i;
+			const char *p;
+			for (i = 0, p = axi->mem; i < axi->argc; i++) {
+				audit_log_format(ab, "a%d=", i);
+				p = audit_log_untrustedstring(ab, p);
+				audit_log_format(ab, "\n");
+			}
+			break; }
 
 		case AUDIT_SOCKETCALL: {
 			int i;
@@ -1231,6 +1249,39 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode,
 	return 0;
 }
 
+int audit_bprm(struct linux_binprm *bprm)
+{
+	struct audit_aux_data_execve *ax;
+	struct audit_context *context = current->audit_context;
+	unsigned long p, next;
+	void *to;
+
+	if (likely(!audit_enabled || !context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
+				GFP_KERNEL);
+	if (!ax)
+		return -ENOMEM;
+
+	ax->argc = bprm->argc;
+	ax->envc = bprm->envc;
+	for (p = bprm->p, to = ax->mem; p < MAX_ARG_PAGES*PAGE_SIZE; p = next) {
+		struct page *page = bprm->page[p / PAGE_SIZE];
+		void *kaddr = kmap(page);
+		next = (p + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+		memcpy(to, kaddr + (p & (PAGE_SIZE - 1)), next - p);
+		to += next - p;
+		kunmap(page);
+	}
+
+	ax->d.type = AUDIT_EXECVE;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+
 /**
  * audit_socketcall - record audit data for sys_socketcall
  * @nargs: number of args
-- 
cgit v1.2.3-59-g8ed1b


From e1396065e0489f98b35021b97907ab4edbfb24e1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 25 May 2006 10:19:47 -0400
Subject: [PATCH] collect sid of those who send signals to auditd

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  3 +--
 kernel/audit.c        | 31 ++++++++++++++++++++-----------
 kernel/audit.h        | 11 +++++++++++
 kernel/auditsc.c      | 23 ++++++++++++-----------
 kernel/signal.c       |  2 +-
 5 files changed, 45 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 1a221b65f7b7..1057e90bd3e3 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -278,6 +278,7 @@ struct audit_rule {		/* for AUDIT_LIST, AUDIT_ADD, and AUDIT_DEL */
 struct audit_sig_info {
 	uid_t		uid;
 	pid_t		pid;
+	char		ctx[0];
 };
 
 struct audit_buffer;
@@ -328,7 +329,6 @@ extern int audit_bprm(struct linux_binprm *bprm);
 extern int audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt);
-extern void audit_signal_info(int sig, struct task_struct *t);
 extern int audit_set_macxattr(const char *name);
 #else
 #define audit_alloc(t) ({ 0; })
@@ -349,7 +349,6 @@ extern int audit_set_macxattr(const char *name);
 #define audit_socketcall(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_avc_path(dentry, mnt) ({ 0; })
-#define audit_signal_info(s,t) do { ; } while (0)
 #define audit_set_macxattr(n) do { ; } while (0)
 #endif
 
diff --git a/kernel/audit.c b/kernel/audit.c
index d09f131b111a..bb20922d08cc 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -89,6 +89,7 @@ static int	audit_backlog_wait_overflow = 0;
 /* The identity of the user shutting down the audit system. */
 uid_t		audit_sig_uid = -1;
 pid_t		audit_sig_pid = -1;
+u32		audit_sig_sid = 0;
 
 /* Records can be lost in several ways:
    0) [suppressed in audit_alloc]
@@ -479,7 +480,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct audit_buffer	*ab;
 	u16			msg_type = nlh->nlmsg_type;
 	uid_t			loginuid; /* loginuid of sender */
-	struct audit_sig_info   sig_data;
+	struct audit_sig_info   *sig_data;
+	char			*ctx;
+	u32			len;
 
 	err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
 	if (err)
@@ -531,12 +534,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (status_get->mask & AUDIT_STATUS_PID) {
 			int old   = audit_pid;
 			if (sid) {
-				char *ctx = NULL;
-				u32 len;
-				int rc;
-				if ((rc = selinux_ctxid_to_string(
+				if ((err = selinux_ctxid_to_string(
 						sid, &ctx, &len)))
-					return rc;
+					return err;
 				else
 					audit_log(NULL, GFP_KERNEL,
 						AUDIT_CONFIG_CHANGE,
@@ -572,8 +572,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 						 "user pid=%d uid=%u auid=%u",
 						 pid, uid, loginuid);
 				if (sid) {
-					char *ctx = NULL;
-					u32 len;
 					if (selinux_ctxid_to_string(
 							sid, &ctx, &len)) {
 						audit_log_format(ab, 
@@ -612,10 +610,21 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 					   loginuid, sid);
 		break;
 	case AUDIT_SIGNAL_INFO:
-		sig_data.uid = audit_sig_uid;
-		sig_data.pid = audit_sig_pid;
+		err = selinux_ctxid_to_string(audit_sig_sid, &ctx, &len);
+		if (err)
+			return err;
+		sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
+		if (!sig_data) {
+			kfree(ctx);
+			return -ENOMEM;
+		}
+		sig_data->uid = audit_sig_uid;
+		sig_data->pid = audit_sig_pid;
+		memcpy(sig_data->ctx, ctx, len);
+		kfree(ctx);
 		audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, 
-				0, 0, &sig_data, sizeof(sig_data));
+				0, 0, sig_data, sizeof(*sig_data) + len);
+		kfree(sig_data);
 		break;
 	default:
 		err = -EINVAL;
diff --git a/kernel/audit.h b/kernel/audit.h
index 8948fc1e9e54..52cb1e31d522 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -101,3 +101,14 @@ struct audit_netlink_list {
 int audit_send_list(void *);
 
 extern int selinux_audit_rule_update(void);
+
+#ifdef CONFIG_AUDITSYSCALL
+extern void __audit_signal_info(int sig, struct task_struct *t);
+static inline void audit_signal_info(int sig, struct task_struct *t)
+{
+	if (unlikely(audit_pid && t->tgid == audit_pid))
+		__audit_signal_info(sig, t);
+}
+#else
+#define audit_signal_info(s,t)
+#endif
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 114f921979ec..4ca913daa7da 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1376,19 +1376,20 @@ int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt)
  * If the audit subsystem is being terminated, record the task (pid)
  * and uid that is doing that.
  */
-void audit_signal_info(int sig, struct task_struct *t)
+void __audit_signal_info(int sig, struct task_struct *t)
 {
 	extern pid_t audit_sig_pid;
 	extern uid_t audit_sig_uid;
-
-	if (unlikely(audit_pid && t->tgid == audit_pid)) {
-		if (sig == SIGTERM || sig == SIGHUP) {
-			struct audit_context *ctx = current->audit_context;
-			audit_sig_pid = current->pid;
-			if (ctx)
-				audit_sig_uid = ctx->loginuid;
-			else
-				audit_sig_uid = current->uid;
-		}
+	extern u32 audit_sig_sid;
+
+	if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
+		struct task_struct *tsk = current;
+		struct audit_context *ctx = tsk->audit_context;
+		audit_sig_pid = tsk->pid;
+		if (ctx)
+			audit_sig_uid = ctx->loginuid;
+		else
+			audit_sig_uid = tsk->uid;
+		selinux_get_task_sid(tsk, &audit_sig_sid);
 	}
 }
diff --git a/kernel/signal.c b/kernel/signal.c
index e5f8aea78ffe..1b3c921737e2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -23,12 +23,12 @@
 #include <linux/syscalls.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
-#include <linux/audit.h>
 #include <linux/capability.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/siginfo.h>
+#include "audit.h"	/* audit_signal_info() */
 
 /*
  * SLAB caches for signal bits.
-- 
cgit v1.2.3-59-g8ed1b


From 3c66251e573219a0532a5a07381b2f60a412d9eb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 6 May 2006 08:26:27 -0400
Subject: [PATCH] add filtering by ppid

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h | 1 +
 kernel/auditsc.c      | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 1057e90bd3e3..8f6424f2b604 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -152,6 +152,7 @@
 #define AUDIT_SE_TYPE	15	/* security label type */
 #define AUDIT_SE_SEN	16	/* security label sensitivity label */
 #define AUDIT_SE_CLR	17	/* security label clearance label */
+#define AUDIT_PPID	18
 
 				/* These are ONLY useful when checking
 				 * at syscall exit time (AUDIT_AT_EXIT). */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4fc3867fa25a..e4551659ad79 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -188,6 +188,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_PID:
 			result = audit_comparator(tsk->pid, f->op, f->val);
 			break;
+		case AUDIT_PPID:
+			if (ctx)
+				result = audit_comparator(ctx->ppid, f->op, f->val);
+			break;
 		case AUDIT_UID:
 			result = audit_comparator(tsk->uid, f->op, f->val);
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From ac03221a4fdda9bfdabf99bcd129847f20fc1d80 Mon Sep 17 00:00:00 2001
From: Linda Knippers <linda.knippers@hp.com>
Date: Tue, 16 May 2006 22:03:48 -0400
Subject: [PATCH] update of IPC audit record cleanup

The following patch addresses most of the issues with the IPC_SET_PERM
records as described in:
https://www.redhat.com/archives/linux-audit/2006-May/msg00010.html
and addresses the comments I received on the record field names.

To summarize, I made the following changes:

1. Changed sys_msgctl() and semctl_down() so that an IPC_SET_PERM
   record is emitted in the failure case as well as the success case.
   This matches the behavior in sys_shmctl().  I could simplify the
   code in sys_msgctl() and semctl_down() slightly but it would mean
   that in some error cases we could get an IPC_SET_PERM record
   without an IPC record and that seemed odd.

2. No change to the IPC record type, given no feedback on the backward
   compatibility question.

3. Removed the qbytes field from the IPC record.  It wasn't being
   set and when audit_ipc_obj() is called from ipcperms(), the
   information isn't available.  If we want the information in the IPC
   record, more extensive changes will be necessary.  Since it only
   applies to message queues and it isn't really permission related, it
   doesn't seem worth it.

4. Removed the obj field from the IPC_SET_PERM record.  This means that
   the kern_ipc_perm argument is no longer needed.

5. Removed the spaces and renamed the IPC_SET_PERM field names.  Replaced iuid and
   igid fields with ouid and ogid in the IPC record.

I tested this with the lspp.22 kernel on an x86_64 box.  I believe it
applies cleanly on the latest kernel.

-- ljk

Signed-off-by: Linda Knippers <linda.knippers@hp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  4 ++--
 ipc/msg.c             |  9 +++++----
 ipc/sem.c             |  8 +++++---
 ipc/shm.c             |  2 +-
 kernel/auditsc.c      | 22 +++++-----------------
 5 files changed, 18 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 8f6424f2b604..da5f521be04b 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -325,7 +325,7 @@ extern void auditsc_get_stamp(struct audit_context *ctx,
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
 extern int audit_ipc_obj(struct kern_ipc_perm *ipcp);
-extern int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp);
+extern int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
 extern int audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
@@ -345,7 +345,7 @@ extern int audit_set_macxattr(const char *name);
 #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0)
 #define audit_get_loginuid(c) ({ -1; })
 #define audit_ipc_obj(i) ({ 0; })
-#define audit_ipc_set_perm(q,u,g,m,i) ({ 0; })
+#define audit_ipc_set_perm(q,u,g,m) ({ 0; })
 #define audit_bprm(p) ({ 0; })
 #define audit_socketcall(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
diff --git a/ipc/msg.c b/ipc/msg.c
index 7d1340ccb16b..00f015a092d2 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -454,6 +454,11 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
 	err = audit_ipc_obj(ipcp);
 	if (err)
 		goto out_unlock_up;
+	if (cmd==IPC_SET) {
+		err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, setbuf.mode);
+		if (err)
+			goto out_unlock_up;
+	}
 
 	err = -EPERM;
 	if (current->euid != ipcp->cuid && 
@@ -468,10 +473,6 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
 	switch (cmd) {
 	case IPC_SET:
 	{
-		err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, setbuf.mode, ipcp);
-		if (err)
-			goto out_unlock_up;
-
 		err = -EPERM;
 		if (setbuf.qbytes > msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
 			goto out_unlock_up;
diff --git a/ipc/sem.c b/ipc/sem.c
index 7919f8ece6ba..fce0bc8b5ad6 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -828,6 +828,11 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun
 	if (err)
 		goto out_unlock;
 
+	if (cmd == IPC_SET) {
+		err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode);
+		if (err)
+			goto out_unlock;
+	}
 	if (current->euid != ipcp->cuid && 
 	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
 	    	err=-EPERM;
@@ -844,9 +849,6 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun
 		err = 0;
 		break;
 	case IPC_SET:
-		err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode, ipcp);
-		if (err)
-			goto out_unlock;
 		ipcp->uid = setbuf.uid;
 		ipcp->gid = setbuf.gid;
 		ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
diff --git a/ipc/shm.c b/ipc/shm.c
index 809896851902..4f133d24030f 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -643,7 +643,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 		err = audit_ipc_obj(&(shp->shm_perm));
 		if (err)
 			goto out_unlock_up;
-		err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode, &(shp->shm_perm));
+		err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode);
 		if (err)
 			goto out_unlock_up;
 		err=-EPERM;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e4551659ad79..fa4bf9625456 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -648,8 +648,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		case AUDIT_IPC: {
 			struct audit_aux_data_ipcctl *axi = (void *)aux;
 			audit_log_format(ab, 
-				 " qbytes=%lx iuid=%u igid=%u mode=%x",
-				 axi->qbytes, axi->uid, axi->gid, axi->mode);
+				 "ouid=%u ogid=%u mode=%x",
+				 axi->uid, axi->gid, axi->mode);
 			if (axi->osid != 0) {
 				char *ctx = NULL;
 				u32 len;
@@ -667,21 +667,10 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		case AUDIT_IPC_SET_PERM: {
 			struct audit_aux_data_ipcctl *axi = (void *)aux;
 			audit_log_format(ab,
-				" new qbytes=%lx new iuid=%u new igid=%u new mode=%x",
+				"qbytes=%lx ouid=%u ogid=%u mode=%x",
 				axi->qbytes, axi->uid, axi->gid, axi->mode);
-			if (axi->osid != 0) {
-				char *ctx = NULL;
-				u32 len;
-				if (selinux_ctxid_to_string(
-						axi->osid, &ctx, &len)) {
-					audit_log_format(ab, " osid=%u",
-							axi->osid);
-					call_panic = 1;
-				} else
-					audit_log_format(ab, " obj=%s", ctx);
-				kfree(ctx);
-			}
 			break; }
+
 		case AUDIT_EXECVE: {
 			struct audit_aux_data_execve *axi = (void *)aux;
 			int i;
@@ -1232,7 +1221,7 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp)
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp)
+int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
 	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
@@ -1248,7 +1237,6 @@ int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode,
 	ax->uid = uid;
 	ax->gid = gid;
 	ax->mode = mode;
-	selinux_get_ipc_sid(ipcp, &ax->osid);
 
 	ax->d.type = AUDIT_IPC_SET_PERM;
 	ax->d.next = context->aux;
-- 
cgit v1.2.3-59-g8ed1b


From d8945bb51a2bb6623cfa36b9ff63594f46d513aa Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 18 May 2006 16:01:30 -0400
Subject: [PATCH] inline more audit helpers

pull checks for ->audit_context into inlined wrappers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h | 24 +++++++++++++++++++++---
 kernel/auditsc.c      | 14 ++++----------
 2 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index da5f521be04b..4b62743b2e6d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -301,11 +301,16 @@ extern void audit_syscall_entry(int arch,
 				int major, unsigned long a0, unsigned long a1,
 				unsigned long a2, unsigned long a3);
 extern void audit_syscall_exit(int failed, long return_code);
-extern void audit_getname(const char *name);
+extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct inode *inode, unsigned flags);
 extern void __audit_inode_child(const char *dname, const struct inode *inode,
 				unsigned long pino);
+static inline void audit_getname(const char *name)
+{
+	if (unlikely(current->audit_context))
+		__audit_getname(name);
+}
 static inline void audit_inode(const char *name, const struct inode *inode,
 			       unsigned flags) {
 	if (unlikely(current->audit_context))
@@ -324,13 +329,26 @@ extern void auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec *t, unsigned int *serial);
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
-extern int audit_ipc_obj(struct kern_ipc_perm *ipcp);
-extern int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
+extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp);
+extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode);
 extern int audit_bprm(struct linux_binprm *bprm);
 extern int audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt);
 extern int audit_set_macxattr(const char *name);
+
+static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp)
+{
+	if (unlikely(current->audit_context))
+		return __audit_ipc_obj(ipcp);
+	return 0;
+}
+static inline int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
+{
+	if (unlikely(current->audit_context))
+		return __audit_ipc_set_perm(qbytes, uid, gid, mode);
+	return 0;
+}
 #else
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fa4bf9625456..05d31ee4f3dd 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -922,11 +922,11 @@ void audit_syscall_exit(int valid, long return_code)
  * Add a name to the list of audit names for this context.
  * Called from fs/namei.c:getname().
  */
-void audit_getname(const char *name)
+void __audit_getname(const char *name)
 {
 	struct audit_context *context = current->audit_context;
 
-	if (!context || IS_ERR(name) || !name)
+	if (IS_ERR(name) || !name)
 		return;
 
 	if (!context->in_syscall) {
@@ -1189,14 +1189,11 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_ipc_obj(struct kern_ipc_perm *ipcp)
+int __audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
 	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context))
-		return 0;
-
 	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
 	if (!ax)
 		return -ENOMEM;
@@ -1221,14 +1218,11 @@ int audit_ipc_obj(struct kern_ipc_perm *ipcp)
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
+int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode)
 {
 	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context))
-		return 0;
-
 	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
 	if (!ax)
 		return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 20ca73bc792be9625af184cbec36e1372611d1c3 Mon Sep 17 00:00:00 2001
From: "George C. Wilson" <ltcgcw@us.ibm.com>
Date: Wed, 24 May 2006 16:09:55 -0500
Subject: [PATCH] Audit of POSIX Message Queue Syscalls v.2

This patch adds audit support to POSIX message queues.  It applies cleanly to
the lspp.b15 branch of Al Viro's git tree.  There are new auxiliary data
structures, and collection and emission routines in kernel/auditsc.c.  New hooks
in ipc/mqueue.c collect arguments from the syscalls.

I tested the patch by building the examples from the POSIX MQ library tarball.
Build them -lrt, not against the old MQ library in the tarball.  Here's the URL:
http://www.geocities.com/wronski12/posix_ipc/libmqueue-4.41.tar.gz
Do auditctl -a exit,always -S for mq_open, mq_timedsend, mq_timedreceive,
mq_notify, mq_getsetattr.  mq_unlink has no new hooks.  Please see the
corresponding userspace patch to get correct output from auditd for the new
record types.

[fixes folded]

Signed-off-by: George Wilson <ltcgcw@us.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |  46 +++++++++
 ipc/mqueue.c          |  22 ++++
 kernel/auditsc.c      | 274 +++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 341 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 4b62743b2e6d..7c8780b150e6 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -85,6 +85,10 @@
 #define AUDIT_CWD		1307	/* Current working directory */
 #define AUDIT_EXECVE		1309	/* execve arguments */
 #define AUDIT_IPC_SET_PERM	1311	/* IPC new permissions record type */
+#define AUDIT_MQ_OPEN		1312	/* POSIX MQ open record type */
+#define AUDIT_MQ_SENDRECV	1313	/* POSIX MQ send/receive record type */
+#define AUDIT_MQ_NOTIFY		1314	/* POSIX MQ notify record type */
+#define AUDIT_MQ_GETSETATTR	1315	/* POSIX MQ get/set attribute record type */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
@@ -287,6 +291,8 @@ struct audit_context;
 struct inode;
 struct netlink_skb_parms;
 struct linux_binprm;
+struct mq_attr;
+struct mqstat;
 
 #define AUDITSC_INVALID 0
 #define AUDITSC_SUCCESS 1
@@ -336,6 +342,11 @@ extern int audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt);
 extern int audit_set_macxattr(const char *name);
+extern int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr);
+extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout);
+extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout);
+extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification);
+extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat);
 
 static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
@@ -349,6 +360,36 @@ static inline int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid,
 		return __audit_ipc_set_perm(qbytes, uid, gid, mode);
 	return 0;
 }
+static inline int audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
+{
+	if (unlikely(current->audit_context))
+		return __audit_mq_open(oflag, mode, u_attr);
+	return 0;
+}
+static inline int audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout)
+{
+	if (unlikely(current->audit_context))
+		return __audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout);
+	return 0;
+}
+static inline int audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout)
+{
+	if (unlikely(current->audit_context))
+		return __audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
+	return 0;
+}
+static inline int audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+{
+	if (unlikely(current->audit_context))
+		return __audit_mq_notify(mqdes, u_notification);
+	return 0;
+}
+static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
+{
+	if (unlikely(current->audit_context))
+		return __audit_mq_getsetattr(mqdes, mqstat);
+	return 0;
+}
 #else
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
@@ -369,6 +410,11 @@ static inline int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid,
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_avc_path(dentry, mnt) ({ 0; })
 #define audit_set_macxattr(n) do { ; } while (0)
+#define audit_mq_open(o,m,a) ({ 0; })
+#define audit_mq_timedsend(d,l,p,t) ({ 0; })
+#define audit_mq_timedreceive(d,l,p,t) ({ 0; })
+#define audit_mq_notify(d,n) ({ 0; })
+#define audit_mq_getsetattr(d,s) ({ 0; })
 #endif
 
 #ifdef CONFIG_AUDIT
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 41ecbd440fed..1511714a9585 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -8,6 +8,8 @@
  * Lockless receive & send, fd based notify:
  * 			    Manfred Spraul	    (manfred@colorfullife.com)
  *
+ * Audit:                   George Wilson           (ltcgcw@us.ibm.com)
+ *
  * This file is released under the GPL.
  */
 
@@ -24,6 +26,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/syscalls.h>
+#include <linux/audit.h>
 #include <linux/signal.h>
 #include <linux/mutex.h>
 
@@ -657,6 +660,10 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
 	char *name;
 	int fd, error;
 
+	error = audit_mq_open(oflag, mode, u_attr);
+	if (error != 0)
+		return error;
+
 	if (IS_ERR(name = getname(u_name)))
 		return PTR_ERR(name);
 
@@ -814,6 +821,10 @@ asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
 	long timeout;
 	int ret;
 
+	ret = audit_mq_timedsend(mqdes, msg_len, msg_prio, u_abs_timeout);
+	if (ret != 0)
+		return ret;
+
 	if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
 		return -EINVAL;
 
@@ -896,6 +907,10 @@ asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
 	struct mqueue_inode_info *info;
 	struct ext_wait_queue wait;
 
+	ret = audit_mq_timedreceive(mqdes, msg_len, u_msg_prio, u_abs_timeout);
+	if (ret != 0)
+		return ret;
+
 	timeout = prepare_timeout(u_abs_timeout);
 
 	ret = -EBADF;
@@ -975,6 +990,10 @@ asmlinkage long sys_mq_notify(mqd_t mqdes,
 	struct mqueue_inode_info *info;
 	struct sk_buff *nc;
 
+	ret = audit_mq_notify(mqdes, u_notification);
+	if (ret != 0)
+		return ret;
+
 	nc = NULL;
 	sock = NULL;
 	if (u_notification != NULL) {
@@ -1115,6 +1134,9 @@ asmlinkage long sys_mq_getsetattr(mqd_t mqdes,
 	omqstat = info->attr;
 	omqstat.mq_flags = filp->f_flags & O_NONBLOCK;
 	if (u_mqstat) {
+		ret = audit_mq_getsetattr(mqdes, &mqstat);
+		if (ret != 0)
+			goto out;
 		if (mqstat.mq_flags & O_NONBLOCK)
 			filp->f_flags |= O_NONBLOCK;
 		else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4503c4663cf8..14e295a4121b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
  * Copyright 2005 Hewlett-Packard Development Company, L.P.
- * Copyright (C) 2005 IBM Corporation
+ * Copyright (C) 2005, 2006 IBM Corporation
  * All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -29,6 +29,9 @@
  * this file -- see entry.S) is based on a GPL'd patch written by
  * okir@suse.de and Copyright 2003 SuSE Linux AG.
  *
+ * POSIX message queue support added by George Wilson <ltcgcw@us.ibm.com>,
+ * 2006.
+ *
  * The support of additional filter rules compares (>, <, >=, <=) was
  * added by Dustin Kirkland <dustin.kirkland@us.ibm.com>, 2005.
  *
@@ -49,6 +52,7 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/socket.h>
+#include <linux/mqueue.h>
 #include <linux/audit.h>
 #include <linux/personality.h>
 #include <linux/time.h>
@@ -102,6 +106,33 @@ struct audit_aux_data {
 
 #define AUDIT_AUX_IPCPERM	0
 
+struct audit_aux_data_mq_open {
+	struct audit_aux_data	d;
+	int			oflag;
+	mode_t			mode;
+	struct mq_attr		attr;
+};
+
+struct audit_aux_data_mq_sendrecv {
+	struct audit_aux_data	d;
+	mqd_t			mqdes;
+	size_t			msg_len;
+	unsigned int		msg_prio;
+	struct timespec		abs_timeout;
+};
+
+struct audit_aux_data_mq_notify {
+	struct audit_aux_data	d;
+	mqd_t			mqdes;
+	struct sigevent 	notification;
+};
+
+struct audit_aux_data_mq_getsetattr {
+	struct audit_aux_data	d;
+	mqd_t			mqdes;
+	struct mq_attr 		mqstat;
+};
+
 struct audit_aux_data_ipcctl {
 	struct audit_aux_data	d;
 	struct ipc_perm		p;
@@ -644,6 +675,43 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			continue; /* audit_panic has been called */
 
 		switch (aux->type) {
+		case AUDIT_MQ_OPEN: {
+			struct audit_aux_data_mq_open *axi = (void *)aux;
+			audit_log_format(ab,
+				"oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld "
+				"mq_msgsize=%ld mq_curmsgs=%ld",
+				axi->oflag, axi->mode, axi->attr.mq_flags,
+				axi->attr.mq_maxmsg, axi->attr.mq_msgsize,
+				axi->attr.mq_curmsgs);
+			break; }
+
+		case AUDIT_MQ_SENDRECV: {
+			struct audit_aux_data_mq_sendrecv *axi = (void *)aux;
+			audit_log_format(ab,
+				"mqdes=%d msg_len=%zd msg_prio=%u "
+				"abs_timeout_sec=%ld abs_timeout_nsec=%ld",
+				axi->mqdes, axi->msg_len, axi->msg_prio,
+				axi->abs_timeout.tv_sec, axi->abs_timeout.tv_nsec);
+			break; }
+
+		case AUDIT_MQ_NOTIFY: {
+			struct audit_aux_data_mq_notify *axi = (void *)aux;
+			audit_log_format(ab,
+				"mqdes=%d sigev_signo=%d",
+				axi->mqdes,
+				axi->notification.sigev_signo);
+			break; }
+
+		case AUDIT_MQ_GETSETATTR: {
+			struct audit_aux_data_mq_getsetattr *axi = (void *)aux;
+			audit_log_format(ab,
+				"mqdes=%d mq_flags=0x%lx mq_maxmsg=%ld mq_msgsize=%ld "
+				"mq_curmsgs=%ld ",
+				axi->mqdes,
+				axi->mqstat.mq_flags, axi->mqstat.mq_maxmsg,
+				axi->mqstat.mq_msgsize, axi->mqstat.mq_curmsgs);
+			break; }
+
 		case AUDIT_IPC: {
 			struct audit_aux_data_ipcctl *axi = (void *)aux;
 			audit_log_format(ab, 
@@ -1182,6 +1250,210 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
 	return ctx ? ctx->loginuid : -1;
 }
 
+/**
+ * __audit_mq_open - record audit data for a POSIX MQ open
+ * @oflag: open flag
+ * @mode: mode bits
+ * @u_attr: queue attributes
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int __audit_mq_open(int oflag, mode_t mode, struct mq_attr __user *u_attr)
+{
+	struct audit_aux_data_mq_open *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	if (u_attr != NULL) {
+		if (copy_from_user(&ax->attr, u_attr, sizeof(ax->attr))) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		memset(&ax->attr, 0, sizeof(ax->attr));
+
+	ax->oflag = oflag;
+	ax->mode = mode;
+
+	ax->d.type = AUDIT_MQ_OPEN;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * __audit_mq_timedsend - record audit data for a POSIX MQ timed send
+ * @mqdes: MQ descriptor
+ * @msg_len: Message length
+ * @msg_prio: Message priority
+ * @abs_timeout: Message timeout in absolute time
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
+			const struct timespec __user *u_abs_timeout)
+{
+	struct audit_aux_data_mq_sendrecv *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	if (u_abs_timeout != NULL) {
+		if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
+
+	ax->mqdes = mqdes;
+	ax->msg_len = msg_len;
+	ax->msg_prio = msg_prio;
+
+	ax->d.type = AUDIT_MQ_SENDRECV;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * __audit_mq_timedreceive - record audit data for a POSIX MQ timed receive
+ * @mqdes: MQ descriptor
+ * @msg_len: Message length
+ * @msg_prio: Message priority
+ * @abs_timeout: Message timeout in absolute time
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len,
+				unsigned int __user *u_msg_prio,
+				const struct timespec __user *u_abs_timeout)
+{
+	struct audit_aux_data_mq_sendrecv *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	if (u_msg_prio != NULL) {
+		if (get_user(ax->msg_prio, u_msg_prio)) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		ax->msg_prio = 0;
+
+	if (u_abs_timeout != NULL) {
+		if (copy_from_user(&ax->abs_timeout, u_abs_timeout, sizeof(ax->abs_timeout))) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		memset(&ax->abs_timeout, 0, sizeof(ax->abs_timeout));
+
+	ax->mqdes = mqdes;
+	ax->msg_len = msg_len;
+
+	ax->d.type = AUDIT_MQ_SENDRECV;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * __audit_mq_notify - record audit data for a POSIX MQ notify
+ * @mqdes: MQ descriptor
+ * @u_notification: Notification event
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+
+int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+{
+	struct audit_aux_data_mq_notify *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	if (u_notification != NULL) {
+		if (copy_from_user(&ax->notification, u_notification, sizeof(ax->notification))) {
+			kfree(ax);
+			return -EFAULT;
+		}
+	} else
+		memset(&ax->notification, 0, sizeof(ax->notification));
+
+	ax->mqdes = mqdes;
+
+	ax->d.type = AUDIT_MQ_NOTIFY;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * __audit_mq_getsetattr - record audit data for a POSIX MQ get/set attribute
+ * @mqdes: MQ descriptor
+ * @mqstat: MQ flags
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
+{
+	struct audit_aux_data_mq_getsetattr *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (!audit_enabled)
+		return 0;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	ax->mqdes = mqdes;
+	ax->mqstat = *mqstat;
+
+	ax->d.type = AUDIT_MQ_GETSETATTR;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
 /**
  * audit_ipc_obj - record audit data for ipc object
  * @ipcp: ipc permissions
-- 
cgit v1.2.3-59-g8ed1b


From f368c07d7214a7c41dfceb76c8db473b850f0229 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Fri, 7 Apr 2006 16:55:56 -0400
Subject: [PATCH] audit: path-based rules

In this implementation, audit registers inotify watches on the parent
directories of paths specified in audit rules.  When audit's inotify
event handler is called, it updates any affected rules based on the
filesystem event.  If the parent directory is renamed, removed, or its
filesystem is unmounted, audit removes all rules referencing that
inotify watch.

To keep things simple, this implementation limits location-based
auditing to the directory entries in an existing directory.  Given
a path-based rule for /foo/bar/passwd, the following table applies:

    passwd modified -- audit event logged
    passwd replaced -- audit event logged, rules list updated
    bar renamed     -- rule removed
    foo renamed     -- untracked, meaning that the rule now applies to
		       the new location

Audit users typically want to have many rules referencing filesystem
objects, which can significantly impact filtering performance.  This
patch also adds an inode-number-based rule hash to mitigate this
situation.

The patch is relative to the audit git tree:
http://kernel.org/git/?p=linux/kernel/git/viro/audit-current.git;a=summary
and uses the inotify kernel API:
http://lkml.org/lkml/2006/6/1/145

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/audit.h |   1 +
 init/Kconfig          |   3 +-
 kernel/audit.c        |  41 ++-
 kernel/audit.h        |  38 ++-
 kernel/auditfilter.c  | 785 +++++++++++++++++++++++++++++++++++++++++++++++---
 kernel/auditsc.c      | 124 +++++---
 6 files changed, 903 insertions(+), 89 deletions(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 7c8780b150e6..c78327507f4e 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -165,6 +165,7 @@
 #define AUDIT_INODE	102
 #define AUDIT_EXIT	103
 #define AUDIT_SUCCESS   104	/* exit >= 0; value ignored */
+#define AUDIT_WATCH	105
 
 #define AUDIT_ARG0      200
 #define AUDIT_ARG1      (AUDIT_ARG0+1)
diff --git a/init/Kconfig b/init/Kconfig
index 3b36a1d53656..c4d0fa655d5d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -182,7 +182,8 @@ config AUDITSYSCALL
 	help
 	  Enable low-overhead system-call auditing infrastructure that
 	  can be used independently or with another kernel subsystem,
-	  such as SELinux.
+	  such as SELinux.  To use audit's filesystem watch feature, please
+	  ensure that INOTIFY is configured.
 
 config IKCONFIG
 	bool "Kernel .config support"
diff --git a/kernel/audit.c b/kernel/audit.c
index 0738a4b290e6..0fbf1c116363 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -56,6 +56,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/selinux.h>
+#include <linux/inotify.h>
 
 #include "audit.h"
 
@@ -103,6 +104,12 @@ static atomic_t    audit_lost = ATOMIC_INIT(0);
 /* The netlink socket. */
 static struct sock *audit_sock;
 
+/* Inotify handle. */
+struct inotify_handle *audit_ih;
+
+/* Hash for inode-based rules */
+struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+
 /* The audit_freelist is a list of pre-allocated audit buffers (if more
  * than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
  * being placed on the freelist). */
@@ -115,10 +122,8 @@ static struct task_struct *kauditd_task;
 static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait);
 static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait);
 
-/* The netlink socket is only to be read by 1 CPU, which lets us assume
- * that list additions and deletions never happen simultaneously in
- * auditsc.c */
-DEFINE_MUTEX(audit_netlink_mutex);
+/* Serialize requests from userspace. */
+static DEFINE_MUTEX(audit_cmd_mutex);
 
 /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting
  * audit records.  Since printk uses a 1024 byte buffer, this buffer
@@ -373,8 +378,8 @@ int audit_send_list(void *_dest)
 	struct sk_buff *skb;
 
 	/* wait for parent to finish and send an ACK */
-	mutex_lock(&audit_netlink_mutex);
-	mutex_unlock(&audit_netlink_mutex);
+	mutex_lock(&audit_cmd_mutex);
+	mutex_unlock(&audit_cmd_mutex);
 
 	while ((skb = __skb_dequeue(&dest->q)) != NULL)
 		netlink_unicast(audit_sock, skb, pid, 0);
@@ -665,20 +670,30 @@ static void audit_receive(struct sock *sk, int length)
 	struct sk_buff  *skb;
 	unsigned int qlen;
 
-	mutex_lock(&audit_netlink_mutex);
+	mutex_lock(&audit_cmd_mutex);
 
 	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		audit_receive_skb(skb);
 		kfree_skb(skb);
 	}
-	mutex_unlock(&audit_netlink_mutex);
+	mutex_unlock(&audit_cmd_mutex);
 }
 
+#ifdef CONFIG_AUDITSYSCALL
+static const struct inotify_operations audit_inotify_ops = {
+	.handle_event	= audit_handle_ievent,
+	.destroy_watch	= audit_free_parent,
+};
+#endif
 
 /* Initialize audit support at boot time. */
 static int __init audit_init(void)
 {
+#ifdef CONFIG_AUDITSYSCALL
+	int i;
+#endif
+
 	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
 	       audit_default ? "enabled" : "disabled");
 	audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
@@ -697,6 +712,16 @@ static int __init audit_init(void)
 	selinux_audit_set_callback(&selinux_audit_rule_update);
 
 	audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
+
+#ifdef CONFIG_AUDITSYSCALL
+	audit_ih = inotify_init(&audit_inotify_ops);
+	if (IS_ERR(audit_ih))
+		audit_panic("cannot initialize inotify handle");
+
+	for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
+		INIT_LIST_HEAD(&audit_inode_hash[i]);
+#endif
+
 	return 0;
 }
 __initcall(audit_init);
diff --git a/kernel/audit.h b/kernel/audit.h
index 52cb1e31d522..58fa44cb8d01 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -19,7 +19,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/mutex.h>
 #include <linux/fs.h>
 #include <linux/audit.h>
 #include <linux/skbuff.h>
@@ -54,6 +53,18 @@ enum audit_state {
 };
 
 /* Rule lists */
+struct audit_parent;
+
+struct audit_watch {
+	atomic_t		count;	/* reference count */
+	char			*path;	/* insertion path */
+	dev_t			dev;	/* associated superblock device */
+	unsigned long		ino;	/* associated inode number */
+	struct audit_parent	*parent; /* associated parent */
+	struct list_head	wlist;	/* entry in parent->watches list */
+	struct list_head	rules;	/* associated rules */
+};
+
 struct audit_field {
 	u32				type;
 	u32				val;
@@ -71,6 +82,9 @@ struct audit_krule {
 	u32			buflen; /* for data alloc on list rules */
 	u32			field_count;
 	struct audit_field	*fields;
+	struct audit_field	*inode_f; /* quick access to an inode field */
+	struct audit_watch	*watch;	/* associated watch */
+	struct list_head	rlist;	/* entry in audit_watch.rules list */
 };
 
 struct audit_entry {
@@ -79,10 +93,18 @@ struct audit_entry {
 	struct audit_krule	rule;
 };
 
-
 extern int audit_pid;
-extern int audit_comparator(const u32 left, const u32 op, const u32 right);
 
+#define AUDIT_INODE_BUCKETS	32
+extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+
+static inline int audit_hash_ino(u32 ino)
+{
+	return (ino & (AUDIT_INODE_BUCKETS-1));
+}
+
+extern int audit_comparator(const u32 left, const u32 op, const u32 right);
+extern int audit_compare_dname_path(const char *dname, const char *path);
 extern struct sk_buff *	    audit_make_reply(int pid, int seq, int type,
 					     int done, int multi,
 					     void *payload, int size);
@@ -91,7 +113,6 @@ extern void		    audit_send_reply(int pid, int seq, int type,
 					     void *payload, int size);
 extern void		    audit_log_lost(const char *message);
 extern void		    audit_panic(const char *message);
-extern struct mutex audit_netlink_mutex;
 
 struct audit_netlink_list {
 	int pid;
@@ -100,6 +121,10 @@ struct audit_netlink_list {
 
 int audit_send_list(void *);
 
+struct inotify_watch;
+extern void audit_free_parent(struct inotify_watch *);
+extern void audit_handle_ievent(struct inotify_watch *, u32, u32, u32,
+				const char *, struct inode *);
 extern int selinux_audit_rule_update(void);
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -109,6 +134,11 @@ static inline void audit_signal_info(int sig, struct task_struct *t)
 	if (unlikely(audit_pid && t->tgid == audit_pid))
 		__audit_signal_info(sig, t);
 }
+extern enum audit_state audit_filter_inodes(struct task_struct *,
+					    struct audit_context *);
+extern void audit_set_auditable(struct audit_context *);
 #else
 #define audit_signal_info(s,t)
+#define audit_filter_inodes(t,c) AUDIT_DISABLED
+#define audit_set_auditable(c)
 #endif
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index df9503da40fb..03a6919103d4 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -22,13 +22,59 @@
 #include <linux/kernel.h>
 #include <linux/audit.h>
 #include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
 #include <linux/netlink.h>
+#include <linux/sched.h>
+#include <linux/inotify.h>
 #include <linux/selinux.h>
 #include "audit.h"
 
-/* There are three lists of rules -- one to search at task creation
- * time, one to search at syscall entry time, and another to search at
- * syscall exit time. */
+/*
+ * Locking model:
+ *
+ * audit_filter_mutex:
+ * 		Synchronizes writes and blocking reads of audit's filterlist
+ * 		data.  Rcu is used to traverse the filterlist and access
+ * 		contents of structs audit_entry, audit_watch and opaque
+ * 		selinux rules during filtering.  If modified, these structures
+ * 		must be copied and replace their counterparts in the filterlist.
+ * 		An audit_parent struct is not accessed during filtering, so may
+ * 		be written directly provided audit_filter_mutex is held.
+ */
+
+/*
+ * Reference counting:
+ *
+ * audit_parent: lifetime is from audit_init_parent() to receipt of an IN_IGNORED
+ * 	event.  Each audit_watch holds a reference to its associated parent.
+ *
+ * audit_watch: if added to lists, lifetime is from audit_init_watch() to
+ * 	audit_remove_watch().  Additionally, an audit_watch may exist
+ * 	temporarily to assist in searching existing filter data.  Each
+ * 	audit_krule holds a reference to its associated watch.
+ */
+
+struct audit_parent {
+	struct list_head	ilist;	/* entry in inotify registration list */
+	struct list_head	watches; /* associated watches */
+	struct inotify_watch	wdata;	/* inotify watch data */
+	unsigned		flags;	/* status flags */
+};
+
+/*
+ * audit_parent status flags:
+ *
+ * AUDIT_PARENT_INVALID - set anytime rules/watches are auto-removed due to
+ * a filesystem event to ensure we're adding audit watches to a valid parent.
+ * Technically not needed for IN_DELETE_SELF or IN_UNMOUNT events, as we cannot
+ * receive them while we have nameidata, but must be used for IN_MOVE_SELF which
+ * we can receive while holding nameidata.
+ */
+#define AUDIT_PARENT_INVALID	0x001
+
+/* Audit filter lists, defined in <linux/audit.h> */
 struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
 	LIST_HEAD_INIT(audit_filter_list[0]),
 	LIST_HEAD_INIT(audit_filter_list[1]),
@@ -41,9 +87,53 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
 #endif
 };
 
+static DEFINE_MUTEX(audit_filter_mutex);
+
+/* Inotify handle */
+extern struct inotify_handle *audit_ih;
+
+/* Inotify events we care about. */
+#define AUDIT_IN_WATCH IN_MOVE|IN_CREATE|IN_DELETE|IN_DELETE_SELF|IN_MOVE_SELF
+
+void audit_free_parent(struct inotify_watch *i_watch)
+{
+	struct audit_parent *parent;
+
+	parent = container_of(i_watch, struct audit_parent, wdata);
+	WARN_ON(!list_empty(&parent->watches));
+	kfree(parent);
+}
+
+static inline void audit_get_watch(struct audit_watch *watch)
+{
+	atomic_inc(&watch->count);
+}
+
+static void audit_put_watch(struct audit_watch *watch)
+{
+	if (atomic_dec_and_test(&watch->count)) {
+		WARN_ON(watch->parent);
+		WARN_ON(!list_empty(&watch->rules));
+		kfree(watch->path);
+		kfree(watch);
+	}
+}
+
+static void audit_remove_watch(struct audit_watch *watch)
+{
+	list_del(&watch->wlist);
+	put_inotify_watch(&watch->parent->wdata);
+	watch->parent = NULL;
+	audit_put_watch(watch); /* match initial get */
+}
+
 static inline void audit_free_rule(struct audit_entry *e)
 {
 	int i;
+
+	/* some rules don't have associated watches */
+	if (e->rule.watch)
+		audit_put_watch(e->rule.watch);
 	if (e->rule.fields)
 		for (i = 0; i < e->rule.field_count; i++) {
 			struct audit_field *f = &e->rule.fields[i];
@@ -60,6 +150,50 @@ static inline void audit_free_rule_rcu(struct rcu_head *head)
 	audit_free_rule(e);
 }
 
+/* Initialize a parent watch entry. */
+static struct audit_parent *audit_init_parent(struct nameidata *ndp)
+{
+	struct audit_parent *parent;
+	s32 wd;
+
+	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+	if (unlikely(!parent))
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&parent->watches);
+	parent->flags = 0;
+
+	inotify_init_watch(&parent->wdata);
+	/* grab a ref so inotify watch hangs around until we take audit_filter_mutex */
+	get_inotify_watch(&parent->wdata);
+	wd = inotify_add_watch(audit_ih, &parent->wdata, ndp->dentry->d_inode,
+			       AUDIT_IN_WATCH);
+	if (wd < 0) {
+		audit_free_parent(&parent->wdata);
+		return ERR_PTR(wd);
+	}
+
+	return parent;
+}
+
+/* Initialize a watch entry. */
+static struct audit_watch *audit_init_watch(char *path)
+{
+	struct audit_watch *watch;
+
+	watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+	if (unlikely(!watch))
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&watch->rules);
+	atomic_set(&watch->count, 1);
+	watch->path = path;
+	watch->dev = (dev_t)-1;
+	watch->ino = (unsigned long)-1;
+
+	return watch;
+}
+
 /* Initialize an audit filterlist entry. */
 static inline struct audit_entry *audit_init_entry(u32 field_count)
 {
@@ -107,6 +241,43 @@ static char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
 	return str;
 }
 
+/* Translate an inode field to kernel respresentation. */
+static inline int audit_to_inode(struct audit_krule *krule,
+				 struct audit_field *f)
+{
+	if (krule->listnr != AUDIT_FILTER_EXIT ||
+	    krule->watch || krule->inode_f)
+		return -EINVAL;
+
+	krule->inode_f = f;
+	return 0;
+}
+
+/* Translate a watch string to kernel respresentation. */
+static int audit_to_watch(struct audit_krule *krule, char *path, int len,
+			  u32 op)
+{
+	struct audit_watch *watch;
+
+	if (!audit_ih)
+		return -EOPNOTSUPP;
+
+	if (path[0] != '/' || path[len-1] == '/' ||
+	    krule->listnr != AUDIT_FILTER_EXIT ||
+	    op & ~AUDIT_EQUAL ||
+	    krule->inode_f || krule->watch) /* 1 inode # per rule, for hash */
+		return -EINVAL;
+
+	watch = audit_init_watch(path);
+	if (unlikely(IS_ERR(watch)))
+		return PTR_ERR(watch);
+
+	audit_get_watch(watch);
+	krule->watch = watch;
+
+	return 0;
+}
+
 /* Common user-space to kernel rule translation. */
 static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
 {
@@ -161,6 +332,7 @@ exit_err:
 static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 {
 	struct audit_entry *entry;
+	struct audit_field *f;
 	int err = 0;
 	int i;
 
@@ -175,14 +347,23 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
 		f->val = rule->values[i];
 
-		if (f->type & AUDIT_UNUSED_BITS ||
-		    f->type == AUDIT_SE_USER ||
-		    f->type == AUDIT_SE_ROLE ||
-		    f->type == AUDIT_SE_TYPE ||
-		    f->type == AUDIT_SE_SEN ||
-		    f->type == AUDIT_SE_CLR) {
-			err = -EINVAL;
+		err = -EINVAL;
+		if (f->type & AUDIT_UNUSED_BITS)
+			goto exit_free;
+
+		switch(f->type) {
+		case AUDIT_SE_USER:
+		case AUDIT_SE_ROLE:
+		case AUDIT_SE_TYPE:
+		case AUDIT_SE_SEN:
+		case AUDIT_SE_CLR:
+		case AUDIT_WATCH:
 			goto exit_free;
+		case AUDIT_INODE:
+			err = audit_to_inode(&entry->rule, f);
+			if (err)
+				goto exit_free;
+			break;
 		}
 
 		entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
@@ -199,6 +380,18 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		}
 	}
 
+	f = entry->rule.inode_f;
+	if (f) {
+		switch(f->op) {
+		case AUDIT_NOT_EQUAL:
+			entry->rule.inode_f = NULL;
+		case AUDIT_EQUAL:
+			break;
+		default:
+			goto exit_free;
+		}
+	}
+
 exit_nofree:
 	return entry;
 
@@ -213,6 +406,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 {
 	int err = 0;
 	struct audit_entry *entry;
+	struct audit_field *f;
 	void *bufp;
 	size_t remain = datasz - sizeof(struct audit_rule_data);
 	int i;
@@ -263,6 +457,35 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 			} else
 				f->se_str = str;
 			break;
+		case AUDIT_WATCH:
+			str = audit_unpack_string(&bufp, &remain, f->val);
+			if (IS_ERR(str))
+				goto exit_free;
+			entry->rule.buflen += f->val;
+
+			err = audit_to_watch(&entry->rule, str, f->val, f->op);
+			if (err) {
+				kfree(str);
+				goto exit_free;
+			}
+			break;
+		case AUDIT_INODE:
+			err = audit_to_inode(&entry->rule, f);
+			if (err)
+				goto exit_free;
+			break;
+		}
+	}
+
+	f = entry->rule.inode_f;
+	if (f) {
+		switch(f->op) {
+		case AUDIT_NOT_EQUAL:
+			entry->rule.inode_f = NULL;
+		case AUDIT_EQUAL:
+			break;
+		default:
+			goto exit_free;
 		}
 	}
 
@@ -346,6 +569,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 			data->buflen += data->values[i] =
 				audit_pack_string(&bufp, f->se_str);
 			break;
+		case AUDIT_WATCH:
+			data->buflen += data->values[i] =
+				audit_pack_string(&bufp, krule->watch->path);
+			break;
 		default:
 			data->values[i] = f->val;
 		}
@@ -381,6 +608,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
 			if (strcmp(a->fields[i].se_str, b->fields[i].se_str))
 				return 1;
 			break;
+		case AUDIT_WATCH:
+			if (strcmp(a->watch->path, b->watch->path))
+				return 1;
+			break;
 		default:
 			if (a->fields[i].val != b->fields[i].val)
 				return 1;
@@ -394,6 +625,32 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
 	return 0;
 }
 
+/* Duplicate the given audit watch.  The new watch's rules list is initialized
+ * to an empty list and wlist is undefined. */
+static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
+{
+	char *path;
+	struct audit_watch *new;
+
+	path = kstrdup(old->path, GFP_KERNEL);
+	if (unlikely(!path))
+		return ERR_PTR(-ENOMEM);
+
+	new = audit_init_watch(path);
+	if (unlikely(IS_ERR(new))) {
+		kfree(path);
+		goto out;
+	}
+
+	new->dev = old->dev;
+	new->ino = old->ino;
+	get_inotify_watch(&old->parent->wdata);
+	new->parent = old->parent;
+
+out:
+	return new;
+}
+
 /* Duplicate selinux field information.  The se_rule is opaque, so must be
  * re-initialized. */
 static inline int audit_dupe_selinux_field(struct audit_field *df,
@@ -425,8 +682,11 @@ static inline int audit_dupe_selinux_field(struct audit_field *df,
 /* Duplicate an audit rule.  This will be a deep copy with the exception
  * of the watch - that pointer is carried over.  The selinux specific fields
  * will be updated in the copy.  The point is to be able to replace the old
- * rule with the new rule in the filterlist, then free the old rule. */
-static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
+ * rule with the new rule in the filterlist, then free the old rule.
+ * The rlist element is undefined; list manipulations are handled apart from
+ * the initial copy. */
+static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
+					   struct audit_watch *watch)
 {
 	u32 fcount = old->field_count;
 	struct audit_entry *entry;
@@ -445,6 +705,8 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
 		new->mask[i] = old->mask[i];
 	new->buflen = old->buflen;
+	new->inode_f = old->inode_f;
+	new->watch = NULL;
 	new->field_count = old->field_count;
 	memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount);
 
@@ -466,21 +728,318 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
 		}
 	}
 
+	if (watch) {
+		audit_get_watch(watch);
+		new->watch = watch;
+	}
+
 	return entry;
 }
 
-/* Add rule to given filterlist if not a duplicate.  Protected by
- * audit_netlink_mutex. */
+/* Update inode info in audit rules based on filesystem event. */
+static void audit_update_watch(struct audit_parent *parent,
+			       const char *dname, dev_t dev,
+			       unsigned long ino, unsigned invalidating)
+{
+	struct audit_watch *owatch, *nwatch, *nextw;
+	struct audit_krule *r, *nextr;
+	struct audit_entry *oentry, *nentry;
+	struct audit_buffer *ab;
+
+	mutex_lock(&audit_filter_mutex);
+	list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
+		if (audit_compare_dname_path(dname, owatch->path))
+			continue;
+
+		/* If the update involves invalidating rules, do the inode-based
+		 * filtering now, so we don't omit records. */
+		if (invalidating &&
+		    audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT)
+			audit_set_auditable(current->audit_context);
+
+		nwatch = audit_dupe_watch(owatch);
+		if (unlikely(IS_ERR(nwatch))) {
+			mutex_unlock(&audit_filter_mutex);
+			audit_panic("error updating watch, skipping");
+			return;
+		}
+		nwatch->dev = dev;
+		nwatch->ino = ino;
+
+		list_for_each_entry_safe(r, nextr, &owatch->rules, rlist) {
+
+			oentry = container_of(r, struct audit_entry, rule);
+			list_del(&oentry->rule.rlist);
+			list_del_rcu(&oentry->list);
+
+			nentry = audit_dupe_rule(&oentry->rule, nwatch);
+			if (unlikely(IS_ERR(nentry)))
+				audit_panic("error updating watch, removing");
+			else {
+				int h = audit_hash_ino((u32)ino);
+				list_add(&nentry->rule.rlist, &nwatch->rules);
+				list_add_rcu(&nentry->list, &audit_inode_hash[h]);
+			}
+
+			call_rcu(&oentry->rcu, audit_free_rule_rcu);
+		}
+
+		ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+		audit_log_format(ab, "audit updated rules specifying watch=");
+		audit_log_untrustedstring(ab, owatch->path);
+		audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino);
+		audit_log_end(ab);
+
+		audit_remove_watch(owatch);
+		goto add_watch_to_parent; /* event applies to a single watch */
+	}
+	mutex_unlock(&audit_filter_mutex);
+	return;
+
+add_watch_to_parent:
+	list_add(&nwatch->wlist, &parent->watches);
+	mutex_unlock(&audit_filter_mutex);
+	return;
+}
+
+/* Remove all watches & rules associated with a parent that is going away. */
+static void audit_remove_parent_watches(struct audit_parent *parent)
+{
+	struct audit_watch *w, *nextw;
+	struct audit_krule *r, *nextr;
+	struct audit_entry *e;
+
+	mutex_lock(&audit_filter_mutex);
+	parent->flags |= AUDIT_PARENT_INVALID;
+	list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
+		list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
+			e = container_of(r, struct audit_entry, rule);
+			list_del(&r->rlist);
+			list_del_rcu(&e->list);
+			call_rcu(&e->rcu, audit_free_rule_rcu);
+
+			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				 "audit implicitly removed rule from list=%d\n",
+				  AUDIT_FILTER_EXIT);
+		}
+		audit_remove_watch(w);
+	}
+	mutex_unlock(&audit_filter_mutex);
+}
+
+/* Unregister inotify watches for parents on in_list.
+ * Generates an IN_IGNORED event. */
+static void audit_inotify_unregister(struct list_head *in_list)
+{
+	struct audit_parent *p, *n;
+
+	list_for_each_entry_safe(p, n, in_list, ilist) {
+		list_del(&p->ilist);
+		inotify_rm_watch(audit_ih, &p->wdata);
+		/* the put matching the get in audit_do_del_rule() */
+		put_inotify_watch(&p->wdata);
+	}
+}
+
+/* Find an existing audit rule.
+ * Caller must hold audit_filter_mutex to prevent stale rule data. */
+static struct audit_entry *audit_find_rule(struct audit_entry *entry,
+					   struct list_head *list)
+{
+	struct audit_entry *e, *found = NULL;
+	int h;
+
+	if (entry->rule.watch) {
+		/* we don't know the inode number, so must walk entire hash */
+		for (h = 0; h < AUDIT_INODE_BUCKETS; h++) {
+			list = &audit_inode_hash[h];
+			list_for_each_entry(e, list, list)
+				if (!audit_compare_rule(&entry->rule, &e->rule)) {
+					found = e;
+					goto out;
+				}
+		}
+		goto out;
+	}
+
+	list_for_each_entry(e, list, list)
+		if (!audit_compare_rule(&entry->rule, &e->rule)) {
+			found = e;
+			goto out;
+		}
+
+out:
+	return found;
+}
+
+/* Get path information necessary for adding watches. */
+static int audit_get_nd(char *path, struct nameidata **ndp,
+			struct nameidata **ndw)
+{
+	struct nameidata *ndparent, *ndwatch;
+	int err;
+
+	ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
+	if (unlikely(!ndparent))
+		return -ENOMEM;
+
+	ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
+	if (unlikely(!ndwatch)) {
+		kfree(ndparent);
+		return -ENOMEM;
+	}
+
+	err = path_lookup(path, LOOKUP_PARENT, ndparent);
+	if (err) {
+		kfree(ndparent);
+		kfree(ndwatch);
+		return err;
+	}
+
+	err = path_lookup(path, 0, ndwatch);
+	if (err) {
+		kfree(ndwatch);
+		ndwatch = NULL;
+	}
+
+	*ndp = ndparent;
+	*ndw = ndwatch;
+
+	return 0;
+}
+
+/* Release resources used for watch path information. */
+static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
+{
+	if (ndp) {
+		path_release(ndp);
+		kfree(ndp);
+	}
+	if (ndw) {
+		path_release(ndw);
+		kfree(ndw);
+	}
+}
+
+/* Associate the given rule with an existing parent inotify_watch.
+ * Caller must hold audit_filter_mutex. */
+static void audit_add_to_parent(struct audit_krule *krule,
+				struct audit_parent *parent)
+{
+	struct audit_watch *w, *watch = krule->watch;
+	int watch_found = 0;
+
+	list_for_each_entry(w, &parent->watches, wlist) {
+		if (strcmp(watch->path, w->path))
+			continue;
+
+		watch_found = 1;
+
+		/* put krule's and initial refs to temporary watch */
+		audit_put_watch(watch);
+		audit_put_watch(watch);
+
+		audit_get_watch(w);
+		krule->watch = watch = w;
+		break;
+	}
+
+	if (!watch_found) {
+		get_inotify_watch(&parent->wdata);
+		watch->parent = parent;
+
+		list_add(&watch->wlist, &parent->watches);
+	}
+	list_add(&krule->rlist, &watch->rules);
+}
+
+/* Find a matching watch entry, or add this one.
+ * Caller must hold audit_filter_mutex. */
+static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
+			   struct nameidata *ndw)
+{
+	struct audit_watch *watch = krule->watch;
+	struct inotify_watch *i_watch;
+	struct audit_parent *parent;
+	int ret = 0;
+
+	/* update watch filter fields */
+	if (ndw) {
+		watch->dev = ndw->dentry->d_inode->i_sb->s_dev;
+		watch->ino = ndw->dentry->d_inode->i_ino;
+	}
+
+	/* The audit_filter_mutex must not be held during inotify calls because
+	 * we hold it during inotify event callback processing.  If an existing
+	 * inotify watch is found, inotify_find_watch() grabs a reference before
+	 * returning.
+	 */
+	mutex_unlock(&audit_filter_mutex);
+
+	if (inotify_find_watch(audit_ih, ndp->dentry->d_inode, &i_watch) < 0) {
+		parent = audit_init_parent(ndp);
+		if (IS_ERR(parent)) {
+			/* caller expects mutex locked */
+			mutex_lock(&audit_filter_mutex);
+			return PTR_ERR(parent);
+		}
+	} else
+		parent = container_of(i_watch, struct audit_parent, wdata);
+
+	mutex_lock(&audit_filter_mutex);
+
+	/* parent was moved before we took audit_filter_mutex */
+	if (parent->flags & AUDIT_PARENT_INVALID)
+		ret = -ENOENT;
+	else
+		audit_add_to_parent(krule, parent);
+
+	/* match get in audit_init_parent or inotify_find_watch */
+	put_inotify_watch(&parent->wdata);
+	return ret;
+}
+
+/* Add rule to given filterlist if not a duplicate. */
 static inline int audit_add_rule(struct audit_entry *entry,
-				  struct list_head *list)
+				 struct list_head *list)
 {
 	struct audit_entry *e;
+	struct audit_field *inode_f = entry->rule.inode_f;
+	struct audit_watch *watch = entry->rule.watch;
+	struct nameidata *ndp, *ndw;
+	int h, err, putnd_needed = 0;
+
+	if (inode_f) {
+		h = audit_hash_ino(inode_f->val);
+		list = &audit_inode_hash[h];
+	}
+
+	mutex_lock(&audit_filter_mutex);
+	e = audit_find_rule(entry, list);
+	mutex_unlock(&audit_filter_mutex);
+	if (e) {
+		err = -EEXIST;
+		goto error;
+	}
 
-	/* Do not use the _rcu iterator here, since this is the only
-	 * addition routine. */
-	list_for_each_entry(e, list, list) {
-		if (!audit_compare_rule(&entry->rule, &e->rule))
-			return -EEXIST;
+	/* Avoid calling path_lookup under audit_filter_mutex. */
+	if (watch) {
+		err = audit_get_nd(watch->path, &ndp, &ndw);
+		if (err)
+			goto error;
+		putnd_needed = 1;
+	}
+
+	mutex_lock(&audit_filter_mutex);
+	if (watch) {
+		/* audit_filter_mutex is dropped and re-taken during this call */
+		err = audit_add_watch(&entry->rule, ndp, ndw);
+		if (err) {
+			mutex_unlock(&audit_filter_mutex);
+			goto error;
+		}
+		h = audit_hash_ino((u32)watch->ino);
+		list = &audit_inode_hash[h];
 	}
 
 	if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
@@ -488,27 +1047,77 @@ static inline int audit_add_rule(struct audit_entry *entry,
 	} else {
 		list_add_tail_rcu(&entry->list, list);
 	}
+	mutex_unlock(&audit_filter_mutex);
 
-	return 0;
+	if (putnd_needed)
+		audit_put_nd(ndp, ndw);
+
+ 	return 0;
+
+error:
+	if (putnd_needed)
+		audit_put_nd(ndp, ndw);
+	if (watch)
+		audit_put_watch(watch); /* tmp watch, matches initial get */
+	return err;
 }
 
-/* Remove an existing rule from filterlist.  Protected by
- * audit_netlink_mutex. */
+/* Remove an existing rule from filterlist. */
 static inline int audit_del_rule(struct audit_entry *entry,
 				 struct list_head *list)
 {
 	struct audit_entry  *e;
+	struct audit_field *inode_f = entry->rule.inode_f;
+	struct audit_watch *watch, *tmp_watch = entry->rule.watch;
+	LIST_HEAD(inotify_list);
+	int h, ret = 0;
+
+	if (inode_f) {
+		h = audit_hash_ino(inode_f->val);
+		list = &audit_inode_hash[h];
+	}
 
-	/* Do not use the _rcu iterator here, since this is the only
-	 * deletion routine. */
-	list_for_each_entry(e, list, list) {
-		if (!audit_compare_rule(&entry->rule, &e->rule)) {
-			list_del_rcu(&e->list);
-			call_rcu(&e->rcu, audit_free_rule_rcu);
-			return 0;
+	mutex_lock(&audit_filter_mutex);
+	e = audit_find_rule(entry, list);
+	if (!e) {
+		mutex_unlock(&audit_filter_mutex);
+		ret = -ENOENT;
+		goto out;
+	}
+
+	watch = e->rule.watch;
+	if (watch) {
+		struct audit_parent *parent = watch->parent;
+
+		list_del(&e->rule.rlist);
+
+		if (list_empty(&watch->rules)) {
+			audit_remove_watch(watch);
+
+			if (list_empty(&parent->watches)) {
+				/* Put parent on the inotify un-registration
+				 * list.  Grab a reference before releasing
+				 * audit_filter_mutex, to be released in
+				 * audit_inotify_unregister(). */
+				list_add(&parent->ilist, &inotify_list);
+				get_inotify_watch(&parent->wdata);
+			}
 		}
 	}
-	return -ENOENT;		/* No matching rule */
+
+	list_del_rcu(&e->list);
+	call_rcu(&e->rcu, audit_free_rule_rcu);
+
+	mutex_unlock(&audit_filter_mutex);
+
+	if (!list_empty(&inotify_list))
+		audit_inotify_unregister(&inotify_list);
+
+out:
+	if (tmp_watch)
+		audit_put_watch(tmp_watch); /* match initial get */
+
+	return ret;
 }
 
 /* List rules using struct audit_rule.  Exists for backward
@@ -519,8 +1128,8 @@ static void audit_list(int pid, int seq, struct sk_buff_head *q)
 	struct audit_entry *entry;
 	int i;
 
-	/* The *_rcu iterators not needed here because we are
-	   always called with audit_netlink_mutex held. */
+	/* This is a blocking read, so use audit_filter_mutex instead of rcu
+	 * iterator to sync with list writers. */
 	for (i=0; i<AUDIT_NR_FILTERS; i++) {
 		list_for_each_entry(entry, &audit_filter_list[i], list) {
 			struct audit_rule *rule;
@@ -535,6 +1144,20 @@ static void audit_list(int pid, int seq, struct sk_buff_head *q)
 			kfree(rule);
 		}
 	}
+	for (i = 0; i < AUDIT_INODE_BUCKETS; i++) {
+		list_for_each_entry(entry, &audit_inode_hash[i], list) {
+			struct audit_rule *rule;
+
+			rule = audit_krule_to_rule(&entry->rule);
+			if (unlikely(!rule))
+				break;
+			skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
+					 rule, sizeof(*rule));
+			if (skb)
+				skb_queue_tail(q, skb);
+			kfree(rule);
+		}
+	}
 	skb = audit_make_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0);
 	if (skb)
 		skb_queue_tail(q, skb);
@@ -547,8 +1170,8 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
 	struct audit_entry *e;
 	int i;
 
-	/* The *_rcu iterators not needed here because we are
-	   always called with audit_netlink_mutex held. */
+	/* This is a blocking read, so use audit_filter_mutex instead of rcu
+	 * iterator to sync with list writers. */
 	for (i=0; i<AUDIT_NR_FILTERS; i++) {
 		list_for_each_entry(e, &audit_filter_list[i], list) {
 			struct audit_rule_data *data;
@@ -557,7 +1180,21 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
 			if (unlikely(!data))
 				break;
 			skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
-					 data, sizeof(*data));
+					 data, sizeof(*data) + data->buflen);
+			if (skb)
+				skb_queue_tail(q, skb);
+			kfree(data);
+		}
+	}
+	for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
+		list_for_each_entry(e, &audit_inode_hash[i], list) {
+			struct audit_rule_data *data;
+
+			data = audit_krule_to_data(&e->rule);
+			if (unlikely(!data))
+				break;
+			skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
+					 data, sizeof(*data) + data->buflen);
 			if (skb)
 				skb_queue_tail(q, skb);
 			kfree(data);
@@ -602,10 +1239,12 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 		dest->pid = pid;
 		skb_queue_head_init(&dest->q);
 
+		mutex_lock(&audit_filter_mutex);
 		if (type == AUDIT_LIST)
 			audit_list(pid, seq, &dest->q);
 		else
 			audit_list_rules(pid, seq, &dest->q);
+		mutex_unlock(&audit_filter_mutex);
 
 		tsk = kthread_run(audit_send_list, dest, "audit_send_list");
 		if (IS_ERR(tsk)) {
@@ -625,6 +1264,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 
 		err = audit_add_rule(entry,
 				     &audit_filter_list[entry->rule.listnr]);
+
 		if (sid) {
 			char *ctx = NULL;
 			u32 len;
@@ -705,7 +1345,39 @@ int audit_comparator(const u32 left, const u32 op, const u32 right)
 	return 0;
 }
 
+/* Compare given dentry name with last component in given path,
+ * return of 0 indicates a match. */
+int audit_compare_dname_path(const char *dname, const char *path)
+{
+	int dlen, plen;
+	const char *p;
+
+	if (!dname || !path)
+		return 1;
+
+	dlen = strlen(dname);
+	plen = strlen(path);
+	if (plen < dlen)
+		return 1;
+
+	/* disregard trailing slashes */
+	p = path + plen - 1;
+	while ((*p == '/') && (p > path))
+		p--;
+
+	/* find last path component */
+	p = p - dlen + 1;
+	if (p < path)
+		return 1;
+	else if (p > path) {
+		if (*--p != '/')
+			return 1;
+		else
+			p++;
+	}
 
+	return strncmp(p, dname, dlen);
+}
 
 static int audit_filter_user_rules(struct netlink_skb_parms *cb,
 				   struct audit_krule *rule,
@@ -818,32 +1490,65 @@ static inline int audit_rule_has_selinux(struct audit_krule *rule)
 int selinux_audit_rule_update(void)
 {
 	struct audit_entry *entry, *n, *nentry;
+	struct audit_watch *watch;
 	int i, err = 0;
 
-	/* audit_netlink_mutex synchronizes the writers */
-	mutex_lock(&audit_netlink_mutex);
+	/* audit_filter_mutex synchronizes the writers */
+	mutex_lock(&audit_filter_mutex);
 
 	for (i = 0; i < AUDIT_NR_FILTERS; i++) {
 		list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) {
 			if (!audit_rule_has_selinux(&entry->rule))
 				continue;
 
-			nentry = audit_dupe_rule(&entry->rule);
+			watch = entry->rule.watch;
+			nentry = audit_dupe_rule(&entry->rule, watch);
 			if (unlikely(IS_ERR(nentry))) {
 				/* save the first error encountered for the
 				 * return value */
 				if (!err)
 					err = PTR_ERR(nentry);
 				audit_panic("error updating selinux filters");
+				if (watch)
+					list_del(&entry->rule.rlist);
 				list_del_rcu(&entry->list);
 			} else {
+				if (watch) {
+					list_add(&nentry->rule.rlist,
+						 &watch->rules);
+					list_del(&entry->rule.rlist);
+				}
 				list_replace_rcu(&entry->list, &nentry->list);
 			}
 			call_rcu(&entry->rcu, audit_free_rule_rcu);
 		}
 	}
 
-	mutex_unlock(&audit_netlink_mutex);
+	mutex_unlock(&audit_filter_mutex);
 
 	return err;
 }
+
+/* Update watch data in audit rules based on inotify events. */
+void audit_handle_ievent(struct inotify_watch *i_watch, u32 wd, u32 mask,
+			 u32 cookie, const char *dname, struct inode *inode)
+{
+	struct audit_parent *parent;
+
+	parent = container_of(i_watch, struct audit_parent, wdata);
+
+	if (mask & (IN_CREATE|IN_MOVED_TO) && inode)
+		audit_update_watch(parent, dname, inode->i_sb->s_dev,
+				   inode->i_ino, 0);
+	else if (mask & (IN_DELETE|IN_MOVED_FROM))
+		audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1);
+	/* inotify automatically removes the watch and sends IN_IGNORED */
+	else if (mask & (IN_DELETE_SELF|IN_UNMOUNT))
+		audit_remove_parent_watches(parent);
+	/* inotify does not remove the watch, so remove it manually */
+	else if(mask & IN_MOVE_SELF) {
+		audit_remove_parent_watches(parent);
+		inotify_remove_watch_locked(audit_ih, i_watch);
+	} else if (mask & IN_IGNORED)
+		put_inotify_watch(i_watch);
+}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 14e295a4121b..174a3f624892 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -200,12 +200,13 @@ struct audit_context {
 #endif
 };
 
-
+/* Determine if any context name data matches a rule's watch data */
 /* Compare a task_struct with an audit_rule.  Return 1 on match, 0
  * otherwise. */
 static int audit_filter_rules(struct task_struct *tsk,
 			      struct audit_krule *rule,
 			      struct audit_context *ctx,
+			      struct audit_names *name,
 			      enum audit_state *state)
 {
 	int i, j, need_sid = 1;
@@ -268,7 +269,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 			}
 			break;
 		case AUDIT_DEVMAJOR:
-			if (ctx) {
+			if (name)
+				result = audit_comparator(MAJOR(name->dev),
+							  f->op, f->val);
+			else if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
 					if (audit_comparator(MAJOR(ctx->names[j].dev),	f->op, f->val)) {
 						++result;
@@ -278,7 +282,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 			}
 			break;
 		case AUDIT_DEVMINOR:
-			if (ctx) {
+			if (name)
+				result = audit_comparator(MINOR(name->dev),
+							  f->op, f->val);
+			else if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
 					if (audit_comparator(MINOR(ctx->names[j].dev), f->op, f->val)) {
 						++result;
@@ -288,7 +295,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 			}
 			break;
 		case AUDIT_INODE:
-			if (ctx) {
+			if (name)
+				result = (name->ino == f->val ||
+					  name->pino == f->val);
+			else if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
 					if (audit_comparator(ctx->names[j].ino, f->op, f->val) ||
 					    audit_comparator(ctx->names[j].pino, f->op, f->val)) {
@@ -298,6 +308,12 @@ static int audit_filter_rules(struct task_struct *tsk,
 				}
 			}
 			break;
+		case AUDIT_WATCH:
+			if (name && rule->watch->ino != (unsigned long)-1)
+				result = (name->dev == rule->watch->dev &&
+					  (name->ino == rule->watch->ino ||
+					   name->pino == rule->watch->ino));
+			break;
 		case AUDIT_LOGINUID:
 			result = 0;
 			if (ctx)
@@ -354,7 +370,7 @@ static enum audit_state audit_filter_task(struct task_struct *tsk)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) {
-		if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+		if (audit_filter_rules(tsk, &e->rule, NULL, NULL, &state)) {
 			rcu_read_unlock();
 			return state;
 		}
@@ -384,8 +400,9 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 		int bit  = AUDIT_BIT(ctx->major);
 
 		list_for_each_entry_rcu(e, list, list) {
-			if ((e->rule.mask[word] & bit) == bit
-					&& audit_filter_rules(tsk, &e->rule, ctx, &state)) {
+			if ((e->rule.mask[word] & bit) == bit &&
+			    audit_filter_rules(tsk, &e->rule, ctx, NULL,
+					       &state)) {
 				rcu_read_unlock();
 				return state;
 			}
@@ -395,6 +412,49 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 	return AUDIT_BUILD_CONTEXT;
 }
 
+/* At syscall exit time, this filter is called if any audit_names[] have been
+ * collected during syscall processing.  We only check rules in sublists at hash
+ * buckets applicable to the inode numbers in audit_names[].
+ * Regarding audit_state, same rules apply as for audit_filter_syscall().
+ */
+enum audit_state audit_filter_inodes(struct task_struct *tsk,
+				     struct audit_context *ctx)
+{
+	int i;
+	struct audit_entry *e;
+	enum audit_state state;
+
+	if (audit_pid && tsk->tgid == audit_pid)
+		return AUDIT_DISABLED;
+
+	rcu_read_lock();
+	for (i = 0; i < ctx->name_count; i++) {
+		int word = AUDIT_WORD(ctx->major);
+		int bit  = AUDIT_BIT(ctx->major);
+		struct audit_names *n = &ctx->names[i];
+		int h = audit_hash_ino((u32)n->ino);
+		struct list_head *list = &audit_inode_hash[h];
+
+		if (list_empty(list))
+			continue;
+
+		list_for_each_entry_rcu(e, list, list) {
+			if ((e->rule.mask[word] & bit) == bit &&
+			    audit_filter_rules(tsk, &e->rule, ctx, n, &state)) {
+				rcu_read_unlock();
+				return state;
+			}
+		}
+	}
+	rcu_read_unlock();
+	return AUDIT_BUILD_CONTEXT;
+}
+
+void audit_set_auditable(struct audit_context *ctx)
+{
+	ctx->auditable = 1;
+}
+
 static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 						      int return_valid,
 						      int return_code)
@@ -408,11 +468,20 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 
 	if (context->in_syscall && !context->auditable) {
 		enum audit_state state;
+
 		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
+		if (state == AUDIT_RECORD_CONTEXT) {
+			context->auditable = 1;
+			goto get_context;
+		}
+
+		state = audit_filter_inodes(tsk, context);
 		if (state == AUDIT_RECORD_CONTEXT)
 			context->auditable = 1;
+
 	}
 
+get_context:
 	context->pid = tsk->pid;
 	context->ppid = sys_getppid();	/* sic.  tsk == current in all cases */
 	context->uid = tsk->uid;
@@ -1142,37 +1211,20 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
 		return;
 
 	/* determine matching parent */
-	if (dname)
-		for (idx = 0; idx < context->name_count; idx++)
-			if (context->names[idx].pino == pino) {
-				const char *n;
-				const char *name = context->names[idx].name;
-				int dlen = strlen(dname);
-				int nlen = name ? strlen(name) : 0;
-
-				if (nlen < dlen)
-					continue;
-				
-				/* disregard trailing slashes */
-				n = name + nlen - 1;
-				while ((*n == '/') && (n > name))
-					n--;
-
-				/* find last path component */
-				n = n - dlen + 1;
-				if (n < name)
-					continue;
-				else if (n > name) {
-					if (*--n != '/')
-						continue;
-					else
-						n++;
-				}
+	if (!dname)
+		goto no_match;
+	for (idx = 0; idx < context->name_count; idx++)
+		if (context->names[idx].pino == pino) {
+			const char *name = context->names[idx].name;
 
-				if (strncmp(n, dname, dlen) == 0)
-					goto update_context;
-			}
+			if (!name)
+				continue;
+
+			if (audit_compare_dname_path(dname, name) == 0)
+				goto update_context;
+		}
 
+no_match:
 	/* catch-all in case match not found */
 	idx = context->name_count++;
 	context->names[idx].name  = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 9c937dcc71021f2dbf78f904f03d962dd9bcc130 Mon Sep 17 00:00:00 2001
From: Amy Griffis <amy.griffis@hp.com>
Date: Thu, 8 Jun 2006 23:19:31 -0400
Subject: [PATCH] log more info for directory entry change events

When an audit event involves changes to a directory entry, include
a PATH record for the directory itself.  A few other notable changes:

    - fixed audit_inode_child() hooks in fsnotify_move()
    - removed unused flags arg from audit_inode()
    - added audit log routines for logging a portion of a string

Here's some sample output.

before patch:
type=SYSCALL msg=audit(1149821605.320:26): arch=40000003 syscall=39 success=yes exit=0 a0=bf8d3c7c a1=1ff a2=804e1b8 a3=bf8d3c7c items=1 ppid=739 pid=800 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=ttyS0 comm="mkdir" exe="/bin/mkdir" subj=root:system_r:unconfined_t:s0-s0:c0.c255
type=CWD msg=audit(1149821605.320:26):  cwd="/root"
type=PATH msg=audit(1149821605.320:26): item=0 name="foo" parent=164068 inode=164010 dev=03:00 mode=040755 ouid=0 ogid=0 rdev=00:00 obj=root:object_r:user_home_t:s0

after patch:
type=SYSCALL msg=audit(1149822032.332:24): arch=40000003 syscall=39 success=yes exit=0 a0=bfdd9c7c a1=1ff a2=804e1b8 a3=bfdd9c7c items=2 ppid=714 pid=777 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=ttyS0 comm="mkdir" exe="/bin/mkdir" subj=root:system_r:unconfined_t:s0-s0:c0.c255
type=CWD msg=audit(1149822032.332:24):  cwd="/root"
type=PATH msg=audit(1149822032.332:24): item=0 name="/root" inode=164068 dev=03:00 mode=040750 ouid=0 ogid=0 rdev=00:00 obj=root:object_r:user_home_dir_t:s0
type=PATH msg=audit(1149822032.332:24): item=1 name="foo" inode=164010 dev=03:00 mode=040755 ouid=0 ogid=0 rdev=00:00 obj=root:object_r:user_home_t:s0

Signed-off-by: Amy Griffis <amy.griffis@hp.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               |   2 +-
 fs/open.c                |   4 +-
 fs/xattr.c               |   4 +-
 include/linux/audit.h    |  15 +++---
 include/linux/fsnotify.h |   3 +-
 kernel/audit.c           |  54 +++++++++++++++++++--
 kernel/audit.h           |   3 +-
 kernel/auditfilter.c     |   8 ++-
 kernel/auditsc.c         | 123 ++++++++++++++++++++++++++---------------------
 9 files changed, 142 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index d6e2ee251736..184fe4acf824 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1127,7 +1127,7 @@ out:
 	if (likely(retval == 0)) {
 		if (unlikely(current->audit_context && nd && nd->dentry &&
 				nd->dentry->d_inode))
-		audit_inode(name, nd->dentry->d_inode, flags);
+		audit_inode(name, nd->dentry->d_inode);
 	}
 out_fail:
 	return retval;
diff --git a/fs/open.c b/fs/open.c
index 317b7c7f38a7..4f178acd4c09 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -633,7 +633,7 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 	dentry = file->f_dentry;
 	inode = dentry->d_inode;
 
-	audit_inode(NULL, inode, 0);
+	audit_inode(NULL, inode);
 
 	err = -EROFS;
 	if (IS_RDONLY(inode))
@@ -786,7 +786,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 	if (file) {
 		struct dentry * dentry;
 		dentry = file->f_dentry;
-		audit_inode(NULL, dentry->d_inode, 0);
+		audit_inode(NULL, dentry->d_inode);
 		error = chown_common(dentry, user, group);
 		fput(file);
 	}
diff --git a/fs/xattr.c b/fs/xattr.c
index e416190f5e9c..c32f15b5f60f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -242,7 +242,7 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
 	if (!f)
 		return error;
 	dentry = f->f_dentry;
-	audit_inode(NULL, dentry->d_inode, 0);
+	audit_inode(NULL, dentry->d_inode);
 	error = setxattr(dentry, name, value, size, flags);
 	fput(f);
 	return error;
@@ -469,7 +469,7 @@ sys_fremovexattr(int fd, char __user *name)
 	if (!f)
 		return error;
 	dentry = f->f_dentry;
-	audit_inode(NULL, dentry->d_inode, 0);
+	audit_inode(NULL, dentry->d_inode);
 	error = removexattr(dentry, name);
 	fput(f);
 	return error;
diff --git a/include/linux/audit.h b/include/linux/audit.h
index c78327507f4e..e1c1dbdf9efb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -310,7 +310,7 @@ extern void audit_syscall_entry(int arch,
 extern void audit_syscall_exit(int failed, long return_code);
 extern void __audit_getname(const char *name);
 extern void audit_putname(const char *name);
-extern void __audit_inode(const char *name, const struct inode *inode, unsigned flags);
+extern void __audit_inode(const char *name, const struct inode *inode);
 extern void __audit_inode_child(const char *dname, const struct inode *inode,
 				unsigned long pino);
 static inline void audit_getname(const char *name)
@@ -318,10 +318,9 @@ static inline void audit_getname(const char *name)
 	if (unlikely(current->audit_context))
 		__audit_getname(name);
 }
-static inline void audit_inode(const char *name, const struct inode *inode,
-			       unsigned flags) {
+static inline void audit_inode(const char *name, const struct inode *inode) {
 	if (unlikely(current->audit_context))
-		__audit_inode(name, inode, flags);
+		__audit_inode(name, inode);
 }
 static inline void audit_inode_child(const char *dname, 
 				     const struct inode *inode, 
@@ -398,9 +397,9 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 #define audit_syscall_exit(f,r) do { ; } while (0)
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
-#define __audit_inode(n,i,f) do { ; } while (0)
+#define __audit_inode(n,i) do { ; } while (0)
 #define __audit_inode_child(d,i,p) do { ; } while (0)
-#define audit_inode(n,i,f) do { ; } while (0)
+#define audit_inode(n,i) do { ; } while (0)
 #define audit_inode_child(d,i,p) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0)
 #define audit_get_loginuid(c) ({ -1; })
@@ -435,6 +434,9 @@ extern void		    audit_log_hex(struct audit_buffer *ab,
 					  size_t len);
 extern const char *	    audit_log_untrustedstring(struct audit_buffer *ab,
 						      const char *string);
+extern const char *	    audit_log_n_untrustedstring(struct audit_buffer *ab,
+							size_t n,
+							const char *string);
 extern void		    audit_log_d_path(struct audit_buffer *ab,
 					     const char *prefix,
 					     struct dentry *dentry,
@@ -452,6 +454,7 @@ extern int  audit_receive_filter(int type, int pid, int uid, int seq,
 #define audit_log_end(b) do { ; } while (0)
 #define audit_log_hex(a,b,l) do { ; } while (0)
 #define audit_log_untrustedstring(a,s) do { ; } while (0)
+#define audit_log_n_untrustedstring(a,n,s) do { ; } while (0)
 #define audit_log_d_path(b,p,d,v) do { ; } while (0)
 #endif
 #endif
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index a9d30442448f..cc5dec70c32c 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -67,8 +67,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	if (source) {
 		inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
 	}
-	audit_inode_child(old_name, source, old_dir->i_ino);
-	audit_inode_child(new_name, target, new_dir->i_ino);
+	audit_inode_child(new_name, source, new_dir->i_ino);
 }
 
 /*
diff --git a/kernel/audit.c b/kernel/audit.c
index 0fbf1c116363..7dfac7031bd7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1051,20 +1051,53 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf,
 	skb_put(skb, len << 1); /* new string is twice the old string */
 }
 
+/*
+ * Format a string of no more than slen characters into the audit buffer,
+ * enclosed in quote marks.
+ */
+static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
+			       const char *string)
+{
+	int avail, new_len;
+	unsigned char *ptr;
+	struct sk_buff *skb;
+
+	BUG_ON(!ab->skb);
+	skb = ab->skb;
+	avail = skb_tailroom(skb);
+	new_len = slen + 3;	/* enclosing quotes + null terminator */
+	if (new_len > avail) {
+		avail = audit_expand(ab, new_len);
+		if (!avail)
+			return;
+	}
+	ptr = skb->tail;
+	*ptr++ = '"';
+	memcpy(ptr, string, slen);
+	ptr += slen;
+	*ptr++ = '"';
+	*ptr = 0;
+	skb_put(skb, slen + 2);	/* don't include null terminator */
+}
+
 /**
- * audit_log_unstrustedstring - log a string that may contain random characters
+ * audit_log_n_unstrustedstring - log a string that may contain random characters
  * @ab: audit_buffer
+ * @len: lenth of string (not including trailing null)
  * @string: string to be logged
  *
  * This code will escape a string that is passed to it if the string
  * contains a control character, unprintable character, double quote mark,
  * or a space. Unescaped strings will start and end with a double quote mark.
  * Strings that are escaped are printed in hex (2 digits per char).
+ *
+ * The caller specifies the number of characters in the string to log, which may
+ * or may not be the entire string.
  */
-const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
+const char *audit_log_n_untrustedstring(struct audit_buffer *ab, size_t len,
+					const char *string)
 {
 	const unsigned char *p = string;
-	size_t len = strlen(string);
 
 	while (*p) {
 		if (*p == '"' || *p < 0x21 || *p > 0x7f) {
@@ -1073,10 +1106,23 @@ const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *strin
 		}
 		p++;
 	}
-	audit_log_format(ab, "\"%s\"", string);
+	audit_log_n_string(ab, len, string);
 	return p + 1;
 }
 
+/**
+ * audit_log_unstrustedstring - log a string that may contain random characters
+ * @ab: audit_buffer
+ * @string: string to be logged
+ *
+ * Same as audit_log_n_unstrustedstring(), except that strlen is used to
+ * determine string length.
+ */
+const char *audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
+{
+	return audit_log_n_untrustedstring(ab, strlen(string), string);
+}
+
 /* This is a helper-function to print the escaped d_path */
 void audit_log_d_path(struct audit_buffer *ab, const char *prefix,
 		      struct dentry *dentry, struct vfsmount *vfsmnt)
diff --git a/kernel/audit.h b/kernel/audit.h
index 58fa44cb8d01..8323e4132a33 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -104,7 +104,8 @@ static inline int audit_hash_ino(u32 ino)
 }
 
 extern int audit_comparator(const u32 left, const u32 op, const u32 right);
-extern int audit_compare_dname_path(const char *dname, const char *path);
+extern int audit_compare_dname_path(const char *dname, const char *path,
+				    int *dirlen);
 extern struct sk_buff *	    audit_make_reply(int pid, int seq, int type,
 					     int done, int multi,
 					     void *payload, int size);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a536f7148bcd..4c99d2c586ed 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -787,7 +787,7 @@ static void audit_update_watch(struct audit_parent *parent,
 
 	mutex_lock(&audit_filter_mutex);
 	list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
-		if (audit_compare_dname_path(dname, owatch->path))
+		if (audit_compare_dname_path(dname, owatch->path, NULL))
 			continue;
 
 		/* If the update involves invalidating rules, do the inode-based
@@ -1387,7 +1387,8 @@ int audit_comparator(const u32 left, const u32 op, const u32 right)
 
 /* Compare given dentry name with last component in given path,
  * return of 0 indicates a match. */
-int audit_compare_dname_path(const char *dname, const char *path)
+int audit_compare_dname_path(const char *dname, const char *path,
+			     int *dirlen)
 {
 	int dlen, plen;
 	const char *p;
@@ -1416,6 +1417,9 @@ int audit_compare_dname_path(const char *dname, const char *path)
 			p++;
 	}
 
+	/* return length of path's directory component */
+	if (dirlen)
+		*dirlen = p - path;
 	return strncmp(p, dname, dlen);
 }
 
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 174a3f624892..851ae0217e4b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -82,6 +82,9 @@ extern int audit_enabled;
  * path_lookup. */
 #define AUDIT_NAMES_RESERVED 7
 
+/* Indicates that audit should log the full pathname. */
+#define AUDIT_NAME_FULL -1
+
 /* When fs/namei.c:getname() is called, we store the pointer in name and
  * we don't let putname() free it (instead we free all of the saved
  * pointers at syscall exit time).
@@ -89,8 +92,9 @@ extern int audit_enabled;
  * Further, in fs/namei.c:path_lookup() we store the inode and device. */
 struct audit_names {
 	const char	*name;
+	int		name_len;	/* number of name's characters to log */
+	unsigned	name_put;	/* call __putname() for this name */
 	unsigned long	ino;
-	unsigned long	pino;
 	dev_t		dev;
 	umode_t		mode;
 	uid_t		uid;
@@ -296,12 +300,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 			break;
 		case AUDIT_INODE:
 			if (name)
-				result = (name->ino == f->val ||
-					  name->pino == f->val);
+				result = (name->ino == f->val);
 			else if (ctx) {
 				for (j = 0; j < ctx->name_count; j++) {
-					if (audit_comparator(ctx->names[j].ino, f->op, f->val) ||
-					    audit_comparator(ctx->names[j].pino, f->op, f->val)) {
+					if (audit_comparator(ctx->names[j].ino, f->op, f->val)) {
 						++result;
 						break;
 					}
@@ -311,8 +313,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_WATCH:
 			if (name && rule->watch->ino != (unsigned long)-1)
 				result = (name->dev == rule->watch->dev &&
-					  (name->ino == rule->watch->ino ||
-					   name->pino == rule->watch->ino));
+					  name->ino == rule->watch->ino);
 			break;
 		case AUDIT_LOGINUID:
 			result = 0;
@@ -526,7 +527,7 @@ static inline void audit_free_names(struct audit_context *context)
 #endif
 
 	for (i = 0; i < context->name_count; i++) {
-		if (context->names[i].name)
+		if (context->names[i].name && context->names[i].name_put)
 			__putname(context->names[i].name);
 	}
 	context->name_count = 0;
@@ -850,8 +851,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		}
 	}
 	for (i = 0; i < context->name_count; i++) {
-		unsigned long ino  = context->names[i].ino;
-		unsigned long pino = context->names[i].pino;
+		struct audit_names *n = &context->names[i];
 
 		ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
 		if (!ab)
@@ -859,33 +859,47 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 
 		audit_log_format(ab, "item=%d", i);
 
-		audit_log_format(ab, " name=");
-		if (context->names[i].name)
-			audit_log_untrustedstring(ab, context->names[i].name);
-		else
-			audit_log_format(ab, "(null)");
-
-		if (pino != (unsigned long)-1)
-			audit_log_format(ab, " parent=%lu",  pino);
-		if (ino != (unsigned long)-1)
-			audit_log_format(ab, " inode=%lu",  ino);
-		if ((pino != (unsigned long)-1) || (ino != (unsigned long)-1))
-			audit_log_format(ab, " dev=%02x:%02x mode=%#o" 
-					 " ouid=%u ogid=%u rdev=%02x:%02x", 
-					 MAJOR(context->names[i].dev), 
-					 MINOR(context->names[i].dev), 
-					 context->names[i].mode, 
-					 context->names[i].uid, 
-					 context->names[i].gid, 
-					 MAJOR(context->names[i].rdev), 
-					 MINOR(context->names[i].rdev));
-		if (context->names[i].osid != 0) {
+		if (n->name) {
+			switch(n->name_len) {
+			case AUDIT_NAME_FULL:
+				/* log the full path */
+				audit_log_format(ab, " name=");
+				audit_log_untrustedstring(ab, n->name);
+				break;
+			case 0:
+				/* name was specified as a relative path and the
+				 * directory component is the cwd */
+				audit_log_d_path(ab, " name=", context->pwd,
+						 context->pwdmnt);
+				break;
+			default:
+				/* log the name's directory component */
+				audit_log_format(ab, " name=");
+				audit_log_n_untrustedstring(ab, n->name_len,
+							    n->name);
+			}
+		} else
+			audit_log_format(ab, " name=(null)");
+
+		if (n->ino != (unsigned long)-1) {
+			audit_log_format(ab, " inode=%lu"
+					 " dev=%02x:%02x mode=%#o"
+					 " ouid=%u ogid=%u rdev=%02x:%02x",
+					 n->ino,
+					 MAJOR(n->dev),
+					 MINOR(n->dev),
+					 n->mode,
+					 n->uid,
+					 n->gid,
+					 MAJOR(n->rdev),
+					 MINOR(n->rdev));
+		}
+		if (n->osid != 0) {
 			char *ctx = NULL;
 			u32 len;
 			if (selinux_ctxid_to_string(
-				context->names[i].osid, &ctx, &len)) {
-				audit_log_format(ab, " osid=%u",
-						context->names[i].osid);
+				n->osid, &ctx, &len)) {
+				audit_log_format(ab, " osid=%u", n->osid);
 				call_panic = 2;
 			} else
 				audit_log_format(ab, " obj=%s", ctx);
@@ -1075,6 +1089,8 @@ void __audit_getname(const char *name)
 	}
 	BUG_ON(context->name_count >= AUDIT_NAMES);
 	context->names[context->name_count].name = name;
+	context->names[context->name_count].name_len = AUDIT_NAME_FULL;
+	context->names[context->name_count].name_put = 1;
 	context->names[context->name_count].ino  = (unsigned long)-1;
 	++context->name_count;
 	if (!context->pwd) {
@@ -1141,11 +1157,10 @@ static void audit_inode_context(int idx, const struct inode *inode)
  * audit_inode - store the inode and device from a lookup
  * @name: name being audited
  * @inode: inode being audited
- * @flags: lookup flags (as used in path_lookup())
  *
  * Called from fs/namei.c:path_lookup().
  */
-void __audit_inode(const char *name, const struct inode *inode, unsigned flags)
+void __audit_inode(const char *name, const struct inode *inode)
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
@@ -1171,20 +1186,13 @@ void __audit_inode(const char *name, const struct inode *inode, unsigned flags)
 		++context->ino_count;
 #endif
 	}
+	context->names[idx].ino   = inode->i_ino;
 	context->names[idx].dev	  = inode->i_sb->s_dev;
 	context->names[idx].mode  = inode->i_mode;
 	context->names[idx].uid   = inode->i_uid;
 	context->names[idx].gid   = inode->i_gid;
 	context->names[idx].rdev  = inode->i_rdev;
 	audit_inode_context(idx, inode);
-	if ((flags & LOOKUP_PARENT) && (strcmp(name, "/") != 0) && 
-	    (strcmp(name, ".") != 0)) {
-		context->names[idx].ino   = (unsigned long)-1;
-		context->names[idx].pino  = inode->i_ino;
-	} else {
-		context->names[idx].ino   = inode->i_ino;
-		context->names[idx].pino  = (unsigned long)-1;
-	}
 }
 
 /**
@@ -1206,34 +1214,40 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
+	const char *found_name = NULL;
+	int dirlen = 0;
 
 	if (!context->in_syscall)
 		return;
 
 	/* determine matching parent */
 	if (!dname)
-		goto no_match;
+		goto update_context;
 	for (idx = 0; idx < context->name_count; idx++)
-		if (context->names[idx].pino == pino) {
+		if (context->names[idx].ino == pino) {
 			const char *name = context->names[idx].name;
 
 			if (!name)
 				continue;
 
-			if (audit_compare_dname_path(dname, name) == 0)
-				goto update_context;
+			if (audit_compare_dname_path(dname, name, &dirlen) == 0) {
+				context->names[idx].name_len = dirlen;
+				found_name = name;
+				break;
+			}
 		}
 
-no_match:
-	/* catch-all in case match not found */
+update_context:
 	idx = context->name_count++;
-	context->names[idx].name  = NULL;
-	context->names[idx].pino  = pino;
 #if AUDIT_DEBUG
 	context->ino_count++;
 #endif
+	/* Re-use the name belonging to the slot for a matching parent directory.
+	 * All names for this context are relinquished in audit_free_names() */
+	context->names[idx].name = found_name;
+	context->names[idx].name_len = AUDIT_NAME_FULL;
+	context->names[idx].name_put = 0;	/* don't call __putname() */
 
-update_context:
 	if (inode) {
 		context->names[idx].ino   = inode->i_ino;
 		context->names[idx].dev	  = inode->i_sb->s_dev;
@@ -1242,7 +1256,8 @@ update_context:
 		context->names[idx].gid   = inode->i_gid;
 		context->names[idx].rdev  = inode->i_rdev;
 		audit_inode_context(idx, inode);
-	}
+	} else
+		context->names[idx].ino   = (unsigned long)-1;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b