aboutsummaryrefslogtreecommitdiffstats
path: root/fs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/file.c')
-rw-r--r--fs/file.c145
1 files changed, 74 insertions, 71 deletions
diff --git a/fs/file.c b/fs/file.c
index 97d212a9b814..5f9c802a5d8d 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -87,6 +87,21 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
}
+/*
+ * Note how the fdtable bitmap allocations very much have to be a multiple of
+ * BITS_PER_LONG. This is not only because we walk those things in chunks of
+ * 'unsigned long' in some places, but simply because that is how the Linux
+ * kernel bitmaps are defined to work: they are not "bits in an array of bytes",
+ * they are very much "bits in an array of unsigned long".
+ *
+ * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
+ * by that "1024/sizeof(ptr)" before, we already know there are sufficient
+ * clear low bits. Clang seems to realize that, gcc ends up being confused.
+ *
+ * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
+ * let's consider it documentation (and maybe a test-case for gcc to improve
+ * its code generation ;)
+ */
static struct fdtable * alloc_fdtable(unsigned int nr)
{
struct fdtable *fdt;
@@ -102,6 +117,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
nr /= (1024 / sizeof(struct file *));
nr = roundup_pow_of_two(nr + 1);
nr *= (1024 / sizeof(struct file *));
+ nr = ALIGN(nr, BITS_PER_LONG);
/*
* Note that this can drive nr *below* what we had passed if sysctl_nr_open
* had been set lower between the check in expand_files() and here. Deal
@@ -269,6 +285,19 @@ static unsigned int count_open_files(struct fdtable *fdt)
return i;
}
+/*
+ * Note that a sane fdtable size always has to be a multiple of
+ * BITS_PER_LONG, since we have bitmaps that are sized by this.
+ *
+ * 'max_fds' will normally already be properly aligned, but it
+ * turns out that in the close_range() -> __close_range() ->
+ * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
+ * up having a 'max_fds' value that isn't already aligned.
+ *
+ * Rather than make close_range() have to worry about this,
+ * just make that BITS_PER_LONG alignment be part of a sane
+ * fdtable size. Becuase that's really what it is.
+ */
static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
{
unsigned int count;
@@ -276,7 +305,7 @@ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
count = count_open_files(fdt);
if (max_fds < NR_OPEN_DEFAULT)
max_fds = NR_OPEN_DEFAULT;
- return min(count, max_fds);
+ return ALIGN(min(count, max_fds), BITS_PER_LONG);
}
/*
@@ -601,32 +630,23 @@ EXPORT_SYMBOL(fd_install);
* @files: file struct to retrieve file from
* @fd: file descriptor to retrieve file for
*
- * If this functions returns an EINVAL error pointer the fd was beyond the
- * current maximum number of file descriptors for that fdtable.
+ * Context: files_lock must be held.
*
- * Returns: The file associated with @fd, on error returns an error pointer.
+ * Returns: The file associated with @fd (NULL if @fd is not open)
*/
static struct file *pick_file(struct files_struct *files, unsigned fd)
{
+ struct fdtable *fdt = files_fdtable(files);
struct file *file;
- struct fdtable *fdt;
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- if (fd >= fdt->max_fds) {
- file = ERR_PTR(-EINVAL);
- goto out_unlock;
- }
+ if (fd >= fdt->max_fds)
+ return NULL;
+
file = fdt->fd[fd];
- if (!file) {
- file = ERR_PTR(-EBADF);
- goto out_unlock;
+ if (file) {
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ __put_unused_fd(files, fd);
}
- rcu_assign_pointer(fdt->fd[fd], NULL);
- __put_unused_fd(files, fd);
-
-out_unlock:
- spin_unlock(&files->file_lock);
return file;
}
@@ -635,8 +655,10 @@ int close_fd(unsigned fd)
struct files_struct *files = current->files;
struct file *file;
+ spin_lock(&files->file_lock);
file = pick_file(files, fd);
- if (IS_ERR(file))
+ spin_unlock(&files->file_lock);
+ if (!file)
return -EBADF;
return filp_close(file, files);
@@ -673,20 +695,25 @@ static inline void __range_cloexec(struct files_struct *cur_fds,
static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
unsigned int max_fd)
{
+ unsigned n;
+
+ rcu_read_lock();
+ n = last_fd(files_fdtable(cur_fds));
+ rcu_read_unlock();
+ max_fd = min(max_fd, n);
+
while (fd <= max_fd) {
struct file *file;
+ spin_lock(&cur_fds->file_lock);
file = pick_file(cur_fds, fd++);
- if (!IS_ERR(file)) {
+ spin_unlock(&cur_fds->file_lock);
+
+ if (file) {
/* found a valid file to close */
filp_close(file, cur_fds);
cond_resched();
- continue;
}
-
- /* beyond the last fd in that table */
- if (PTR_ERR(file) == -EINVAL)
- return;
}
}
@@ -766,43 +793,25 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
* See close_fd_get_file() below, this variant assumes current->files->file_lock
* is held.
*/
-int __close_fd_get_file(unsigned int fd, struct file **res)
+struct file *__close_fd_get_file(unsigned int fd)
{
- struct files_struct *files = current->files;
- struct file *file;
- struct fdtable *fdt;
-
- fdt = files_fdtable(files);
- if (fd >= fdt->max_fds)
- goto out_err;
- file = fdt->fd[fd];
- if (!file)
- goto out_err;
- rcu_assign_pointer(fdt->fd[fd], NULL);
- __put_unused_fd(files, fd);
- get_file(file);
- *res = file;
- return 0;
-out_err:
- *res = NULL;
- return -ENOENT;
+ return pick_file(current->files, fd);
}
/*
* variant of close_fd that gets a ref on the file for later fput.
- * The caller must ensure that filp_close() called on the file, and then
- * an fput().
+ * The caller must ensure that filp_close() called on the file.
*/
-int close_fd_get_file(unsigned int fd, struct file **res)
+struct file *close_fd_get_file(unsigned int fd)
{
struct files_struct *files = current->files;
- int ret;
+ struct file *file;
spin_lock(&files->file_lock);
- ret = __close_fd_get_file(fd, res);
+ file = pick_file(files, fd);
spin_unlock(&files->file_lock);
- return ret;
+ return file;
}
void do_close_on_exec(struct files_struct *files)
@@ -842,7 +851,7 @@ void do_close_on_exec(struct files_struct *files)
}
static inline struct file *__fget_files_rcu(struct files_struct *files,
- unsigned int fd, fmode_t mask, unsigned int refs)
+ unsigned int fd, fmode_t mask)
{
for (;;) {
struct file *file;
@@ -868,10 +877,9 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
* Such a race can take two forms:
*
* (a) the file ref already went down to zero,
- * and get_file_rcu_many() fails. Just try
- * again:
+ * and get_file_rcu() fails. Just try again:
*/
- if (unlikely(!get_file_rcu_many(file, refs)))
+ if (unlikely(!get_file_rcu(file)))
continue;
/*
@@ -880,11 +888,11 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
* pointer having changed, because it always goes
* hand-in-hand with 'fdt'.
*
- * If so, we need to put our refs and try again.
+ * If so, we need to put our ref and try again.
*/
if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
unlikely(rcu_dereference_raw(*fdentry) != file)) {
- fput_many(file, refs);
+ fput(file);
continue;
}
@@ -897,37 +905,31 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
}
static struct file *__fget_files(struct files_struct *files, unsigned int fd,
- fmode_t mask, unsigned int refs)
+ fmode_t mask)
{
struct file *file;
rcu_read_lock();
- file = __fget_files_rcu(files, fd, mask, refs);
+ file = __fget_files_rcu(files, fd, mask);
rcu_read_unlock();
return file;
}
-static inline struct file *__fget(unsigned int fd, fmode_t mask,
- unsigned int refs)
-{
- return __fget_files(current->files, fd, mask, refs);
-}
-
-struct file *fget_many(unsigned int fd, unsigned int refs)
+static inline struct file *__fget(unsigned int fd, fmode_t mask)
{
- return __fget(fd, FMODE_PATH, refs);
+ return __fget_files(current->files, fd, mask);
}
struct file *fget(unsigned int fd)
{
- return __fget(fd, FMODE_PATH, 1);
+ return __fget(fd, FMODE_PATH);
}
EXPORT_SYMBOL(fget);
struct file *fget_raw(unsigned int fd)
{
- return __fget(fd, 0, 1);
+ return __fget(fd, 0);
}
EXPORT_SYMBOL(fget_raw);
@@ -937,7 +939,7 @@ struct file *fget_task(struct task_struct *task, unsigned int fd)
task_lock(task);
if (task->files)
- file = __fget_files(task->files, fd, 0, 1);
+ file = __fget_files(task->files, fd, 0);
task_unlock(task);
return file;
@@ -978,6 +980,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
*ret_fd = fd;
return file;
}
+EXPORT_SYMBOL(task_lookup_next_fd_rcu);
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
@@ -1006,7 +1009,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
return 0;
return (unsigned long)file;
} else {
- file = __fget(fd, mask, 1);
+ file = __fget(fd, mask);
if (!file)
return 0;
return FDPUT_FPUT | (unsigned long)file;