aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/aio.c46
-rw-r--r--fs/autofs4/autofs_i.h12
-rw-r--r--fs/autofs4/dev-ioctl.c3
-rw-r--r--fs/autofs4/inode.c4
-rw-r--r--fs/autofs4/waitq.c22
-rw-r--r--fs/binfmt_aout.c32
-rw-r--r--fs/binfmt_elf.c23
-rw-r--r--fs/binfmt_elf_fdpic.c18
-rw-r--r--fs/binfmt_flat.c12
-rw-r--r--fs/binfmt_som.c12
-rw-r--r--fs/bio.c7
-rw-r--r--fs/block_dev.c6
-rw-r--r--fs/btrfs/backref.c27
-rw-r--r--fs/btrfs/ctree.c28
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c40
-rw-r--r--fs/btrfs/disk-io.h3
-rw-r--r--fs/btrfs/extent-tree.c17
-rw-r--r--fs/btrfs/extent_io.c60
-rw-r--r--fs/btrfs/extent_io.h4
-rw-r--r--fs/btrfs/file.c9
-rw-r--r--fs/btrfs/inode.c54
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/btrfs/ioctl.h4
-rw-r--r--fs/btrfs/reada.c48
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/scrub.c22
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/buffer.c5
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifssmb.c6
-rw-r--r--fs/cifs/connect.c36
-rw-r--r--fs/cifs/dir.c17
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/dcache.c200
-rw-r--r--fs/dlm/lock.c12
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c10
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext4/namei.c5
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/gfs2/acl.c12
-rw-r--r--fs/gfs2/aops.c18
-rw-r--r--fs/gfs2/bmap.c10
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/file.c12
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/incore.h26
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/lock_dlm.c10
-rw-r--r--fs/gfs2/log.c103
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c520
-rw-r--r--fs/gfs2/lops.h14
-rw-r--r--fs/gfs2/main.c26
-rw-r--r--fs/gfs2/meta_io.c28
-rw-r--r--fs/gfs2/meta_io.h4
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/quota.c6
-rw-r--r--fs/gfs2/rgrp.c102
-rw-r--r--fs/gfs2/trace_gfs2.h16
-rw-r--r--fs/gfs2/trans.c44
-rw-r--r--fs/gfs2/util.c3
-rw-r--r--fs/gfs2/util.h3
-rw-r--r--fs/hfsplus/catalog.c4
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/jbd2/commit.c4
-rw-r--r--fs/jffs2/gc.c2
-rw-r--r--fs/libfs.c4
-rw-r--r--fs/namei.c81
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/client.c5
-rw-r--r--fs/nfs/dir.c9
-rw-r--r--fs/nfs/idmap.c4
-rw-r--r--fs/nfs/internal.h8
-rw-r--r--fs/nfs/namespace.c93
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs4_fs.h11
-rw-r--r--fs/nfs/nfs4filelayoutdev.c2
-rw-r--r--fs/nfs/nfs4namespace.c86
-rw-r--r--fs/nfs/nfs4proc.c189
-rw-r--r--fs/nfs/nfs4state.c31
-rw-r--r--fs/nfs/nfs4xdr.c53
-rw-r--r--fs/nfs/objlayout/objlayout.c2
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/proc.c3
-rw-r--r--fs/nfs/read.c2
-rw-r--r--fs/nfs/super.c12
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/open.c2
-rw-r--r--fs/pipe.c31
-rw-r--r--fs/proc/base.c63
-rw-r--r--fs/proc/task_mmu.c15
-rw-r--r--fs/pstore/Kconfig17
-rw-r--r--fs/pstore/Makefile3
-rw-r--r--fs/pstore/ram.c383
-rw-r--r--fs/pstore/ram_core.c532
-rw-r--r--fs/stat.c49
-rw-r--r--fs/sysfs/dir.c37
-rw-r--r--fs/ubifs/tnc.c2
-rw-r--r--fs/ubifs/xattr.c6
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/ufs/super.c5
113 files changed, 2431 insertions, 1200 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index e95d1b64082c..022574202749 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -33,7 +33,7 @@ config ARCH_BINFMT_ELF_RANDOMIZE_PIE
config BINFMT_ELF_FDPIC
bool "Kernel support for FDPIC ELF binaries"
default y
- depends on (FRV || BLACKFIN || (SUPERH32 && !MMU))
+ depends on (FRV || BLACKFIN || (SUPERH32 && !MMU) || C6X)
help
ELF FDPIC binaries are based on ELF, but allow the individual load
segments of a binary to be located in memory independently of each
diff --git a/fs/aio.c b/fs/aio.c
index da887604dfc5..e7f2fad7b4ce 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -93,9 +93,8 @@ static void aio_free_ring(struct kioctx *ctx)
put_page(info->ring_pages[i]);
if (info->mmap_size) {
- down_write(&ctx->mm->mmap_sem);
- do_munmap(ctx->mm, info->mmap_base, info->mmap_size);
- up_write(&ctx->mm->mmap_sem);
+ BUG_ON(ctx->mm != current->mm);
+ vm_munmap(info->mmap_base, info->mmap_size);
}
if (info->ring_pages && info->ring_pages != info->internal_pages)
@@ -389,6 +388,17 @@ void exit_aio(struct mm_struct *mm)
"exit_aio:ioctx still alive: %d %d %d\n",
atomic_read(&ctx->users), ctx->dead,
ctx->reqs_active);
+ /*
+ * We don't need to bother with munmap() here -
+ * exit_mmap(mm) is coming and it'll unmap everything.
+ * Since aio_free_ring() uses non-zero ->mmap_size
+ * as indicator that it needs to unmap the area,
+ * just set it to 0; aio_free_ring() is the only
+ * place that uses ->mmap_size, so it's safe.
+ * That way we get all munmap done to current->mm -
+ * all other callers have ctx->mm == current->mm.
+ */
+ ctx->ring_info.mmap_size = 0;
put_ioctx(ctx);
}
}
@@ -1446,6 +1456,10 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
if (ret < 0)
goto out;
+ ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret);
+ if (ret < 0)
+ goto out;
+
kiocb->ki_nr_segs = kiocb->ki_nbytes;
kiocb->ki_cur_seg = 0;
/* ki_nbytes/left now reflect bytes instead of segs */
@@ -1457,11 +1471,17 @@ out:
return ret;
}
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb)
+static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb)
{
+ int bytes;
+
+ bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left);
+ if (bytes < 0)
+ return bytes;
+
kiocb->ki_iovec = &kiocb->ki_inline_vec;
kiocb->ki_iovec->iov_base = kiocb->ki_buf;
- kiocb->ki_iovec->iov_len = kiocb->ki_left;
+ kiocb->ki_iovec->iov_len = bytes;
kiocb->ki_nr_segs = 1;
kiocb->ki_cur_seg = 0;
return 0;
@@ -1486,10 +1506,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
kiocb->ki_left)))
break;
- ret = security_file_permission(file, MAY_READ);
- if (unlikely(ret))
- break;
- ret = aio_setup_single_vector(kiocb);
+ ret = aio_setup_single_vector(READ, file, kiocb);
if (ret)
break;
ret = -EINVAL;
@@ -1504,10 +1521,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
kiocb->ki_left)))
break;
- ret = security_file_permission(file, MAY_WRITE);
- if (unlikely(ret))
- break;
- ret = aio_setup_single_vector(kiocb);
+ ret = aio_setup_single_vector(WRITE, file, kiocb);
if (ret)
break;
ret = -EINVAL;
@@ -1518,9 +1532,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
break;
- ret = security_file_permission(file, MAY_READ);
- if (unlikely(ret))
- break;
ret = aio_setup_vectored_rw(READ, kiocb, compat);
if (ret)
break;
@@ -1532,9 +1543,6 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
break;
- ret = security_file_permission(file, MAY_WRITE);
- if (unlikely(ret))
- break;
ret = aio_setup_vectored_rw(WRITE, kiocb, compat);
if (ret)
break;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index eb1cc92cd67d..908e18455413 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -110,7 +110,6 @@ struct autofs_sb_info {
int sub_version;
int min_proto;
int max_proto;
- int compat_daemon;
unsigned long exp_timeout;
unsigned int type;
int reghost_enabled;
@@ -270,6 +269,17 @@ int autofs4_fill_super(struct super_block *, void *, int);
struct autofs_info *autofs4_new_ino(struct autofs_sb_info *);
void autofs4_clean_ino(struct autofs_info *);
+static inline int autofs_prepare_pipe(struct file *pipe)
+{
+ if (!pipe->f_op || !pipe->f_op->write)
+ return -EINVAL;
+ if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode))
+ return -EINVAL;
+ /* We want a packet pipe */
+ pipe->f_flags |= O_DIRECT;
+ return 0;
+}
+
/* Queue management functions */
int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9dacb8586701..aa9103f8f01b 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -376,7 +376,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
err = -EBADF;
goto out;
}
- if (!pipe->f_op || !pipe->f_op->write) {
+ if (autofs_prepare_pipe(pipe) < 0) {
err = -EPIPE;
fput(pipe);
goto out;
@@ -385,7 +385,6 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
sbi->pipefd = pipefd;
sbi->pipe = pipe;
sbi->catatonic = 0;
- sbi->compat_daemon = is_compat_task();
}
out:
mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index d8dc002e9cc3..6e488ebe7784 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,7 +19,6 @@
#include <linux/parser.h>
#include <linux/bitops.h>
#include <linux/magic.h>
-#include <linux/compat.h>
#include "autofs_i.h"
#include <linux/module.h>
@@ -225,7 +224,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
set_autofs_type_indirect(&sbi->type);
sbi->min_proto = 0;
sbi->max_proto = 0;
- sbi->compat_daemon = is_compat_task();
mutex_init(&sbi->wq_mutex);
mutex_init(&sbi->pipe_mutex);
spin_lock_init(&sbi->fs_lock);
@@ -292,7 +290,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
printk("autofs: could not open pipe file descriptor\n");
goto fail_dput;
}
- if (!pipe->f_op || !pipe->f_op->write)
+ if (autofs_prepare_pipe(pipe) < 0)
goto fail_fput;
sbi->pipe = pipe;
sbi->pipefd = pipefd;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 9c098db43344..da8876d38a7b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -91,24 +91,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
return (bytes > 0);
}
-
-/*
- * The autofs_v5 packet was misdesigned.
- *
- * The packets are identical on x86-32 and x86-64, but have different
- * alignment. Which means that 'sizeof()' will give different results.
- * Fix it up for the case of running 32-bit user mode on a 64-bit kernel.
- */
-static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi)
-{
- size_t pktsz = sizeof(struct autofs_v5_packet);
-#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
- if (sbi->compat_daemon > 0)
- pktsz -= 4;
-#endif
- return pktsz;
-}
-
+
static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
struct autofs_wait_queue *wq,
int type)
@@ -172,7 +155,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
{
struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
- pktsz = autofs_v5_packet_size(sbi);
+ pktsz = sizeof(*packet);
+
packet->wait_queue_token = wq->wait_queue_token;
packet->len = wq->name.len;
memcpy(packet->name, wq->name.name, wq->name.len);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 2eb12f13593d..d146e181d10d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -50,9 +50,7 @@ static int set_brk(unsigned long start, unsigned long end)
end = PAGE_ALIGN(end);
if (end > start) {
unsigned long addr;
- down_write(&current->mm->mmap_sem);
- addr = do_brk(start, end - start);
- up_write(&current->mm->mmap_sem);
+ addr = vm_brk(start, end - start);
if (BAD_ADDR(addr))
return addr;
}
@@ -280,9 +278,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
pos = 32;
map_size = ex.a_text+ex.a_data;
#endif
- down_write(&current->mm->mmap_sem);
- error = do_brk(text_addr & PAGE_MASK, map_size);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(text_addr & PAGE_MASK, map_size);
if (error != (text_addr & PAGE_MASK)) {
send_sig(SIGKILL, current, 0);
return error;
@@ -313,9 +309,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
loff_t pos = fd_offset;
- down_write(&current->mm->mmap_sem);
- do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- up_write(&current->mm->mmap_sem);
+ vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
bprm->file->f_op->read(bprm->file,
(char __user *)N_TXTADDR(ex),
ex.a_text+ex.a_data, &pos);
@@ -325,24 +319,20 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
goto beyond_if;
}
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
+ error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
fd_offset);
- up_write(&current->mm->mmap_sem);
if (error != N_TXTADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
}
- down_write(&current->mm->mmap_sem);
- error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+ error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
fd_offset + ex.a_text);
- up_write(&current->mm->mmap_sem);
if (error != N_DATADDR(ex)) {
send_sig(SIGKILL, current, 0);
return error;
@@ -412,9 +402,7 @@ static int load_aout_library(struct file *file)
"N_TXTOFF is not page aligned. Please convert library: %s\n",
file->f_path.dentry->d_name.name);
}
- down_write(&current->mm->mmap_sem);
- do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- up_write(&current->mm->mmap_sem);
+ vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
file->f_op->read(file, (char __user *)start_addr,
ex.a_text + ex.a_data, &pos);
@@ -425,12 +413,10 @@ static int load_aout_library(struct file *file)
goto out;
}
/* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
+ error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
N_TXTOFF(ex));
- up_write(&current->mm->mmap_sem);
retval = error;
if (error != start_addr)
goto out;
@@ -438,9 +424,7 @@ static int load_aout_library(struct file *file)
len = PAGE_ALIGN(ex.a_text + ex.a_data);
bss = ex.a_text + ex.a_data + ex.a_bss;
if (bss > len) {
- down_write(&current->mm->mmap_sem);
- error = do_brk(start_addr + len, bss - len);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(start_addr + len, bss - len);
retval = error;
if (error != start_addr + len)
goto out;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 48ffb3dc610a..16f735417072 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -82,9 +82,7 @@ static int set_brk(unsigned long start, unsigned long end)
end = ELF_PAGEALIGN(end);
if (end > start) {
unsigned long addr;
- down_write(&current->mm->mmap_sem);
- addr = do_brk(start, end - start);
- up_write(&current->mm->mmap_sem);
+ addr = vm_brk(start, end - start);
if (BAD_ADDR(addr))
return addr;
}
@@ -514,9 +512,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
/* Map the last of the bss segment */
- down_write(&current->mm->mmap_sem);
- error = do_brk(elf_bss, last_bss - elf_bss);
- up_write(&current->mm->mmap_sem);
+ error = vm_brk(elf_bss, last_bss - elf_bss);
if (BAD_ADDR(error))
goto out_close;
}
@@ -962,10 +958,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
and some applications "depend" upon this behavior.
Since we do not have the power to recompile these, we
emulate the SVr4 behavior. Sigh. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
+ error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
}
#ifdef ELF_PLAT_INIT
@@ -1050,8 +1044,7 @@ static int load_elf_library(struct file *file)
eppnt++;
/* Now use mmap to map the library into memory. */
- down_write(&current->mm->mmap_sem);
- error = do_mmap(file,
+ error = vm_mmap(file,
ELF_PAGESTART(eppnt->p_vaddr),
(eppnt->p_filesz +
ELF_PAGEOFFSET(eppnt->p_vaddr)),
@@ -1059,7 +1052,6 @@ static int load_elf_library(struct file *file)
MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
(eppnt->p_offset -
ELF_PAGEOFFSET(eppnt->p_vaddr)));
- up_write(&current->mm->mmap_sem);
if (error != ELF_PAGESTART(eppnt->p_vaddr))
goto out_free_ph;
@@ -1072,11 +1064,8 @@ static int load_elf_library(struct file *file)
len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
ELF_MIN_ALIGN - 1);
bss = eppnt->p_memsz + eppnt->p_vaddr;
- if (bss > len) {
- down_write(&current->mm->mmap_sem);
- do_brk(len, bss - len);
- up_write(&current->mm->mmap_sem);
- }
+ if (bss > len)
+ vm_brk(len, bss - len);
error = 0;
out_free_ph:
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9bd5612a8224..d390a0fffc65 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -390,21 +390,17 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
(executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
stack_prot |= PROT_EXEC;
- down_write(&current->mm->mmap_sem);
- current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
+ current->mm->start_brk = vm_mmap(NULL, 0, stack_size, stack_prot,
MAP_PRIVATE | MAP_ANONYMOUS |
MAP_UNINITIALIZED | MAP_GROWSDOWN,
0);
if (IS_ERR_VALUE(current->mm->start_brk)) {
- up_write(&current->mm->mmap_sem);
retval = current->mm->start_brk;
current->mm->start_brk = 0;
goto error_kill;
}
- up_write(&current->mm->mmap_sem);
-
current->mm->brk = current->mm->start_brk;
current->mm->context.end_brk = current->mm->start_brk;
current->mm->context.end_brk +=
@@ -955,10 +951,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
mflags |= MAP_EXECUTABLE;
- down_write(&mm->mmap_sem);
- maddr = do_mmap(NULL, load_addr, top - base,
+ maddr = vm_mmap(NULL, load_addr, top - base,
PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
- up_write(&mm->mmap_sem);
if (IS_ERR_VALUE(maddr))
return (int) maddr;
@@ -1096,10 +1090,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
/* create the mapping */
disp = phdr->p_vaddr & ~PAGE_MASK;
- down_write(&mm->mmap_sem);
- maddr = do_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
+ maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
phdr->p_offset - disp);
- up_write(&mm->mmap_sem);
kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
loop, phdr->p_memsz + disp, prot, flags,
@@ -1143,10 +1135,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
unsigned long xmaddr;
flags |= MAP_FIXED | MAP_ANONYMOUS;
- down_write(&mm->mmap_sem);
- xmaddr = do_mmap(NULL, xaddr, excess - excess1,
+ xmaddr = vm_mmap(NULL, xaddr, excess - excess1,
prot, flags, 0);
- up_write(&mm->mmap_sem);
kdebug("mmap[%d] <anon>"
" ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 024d20ee3ca3..6b2daf99fab8 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -542,10 +542,8 @@ static int load_flat_file(struct linux_binprm * bprm,
*/
DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
- down_write(&current->mm->mmap_sem);
- textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
+ textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_EXECUTABLE, 0);
- up_write(&current->mm->mmap_sem);
if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
textpos = (unsigned long) -ENOMEM;
@@ -556,10 +554,8 @@ static int load_flat_file(struct linux_binprm * bprm,
len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
- down_write(&current->mm->mmap_sem);
- realdatastart = do_mmap(0, 0, len,
+ realdatastart = vm_mmap(0, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
if (!realdatastart)
@@ -603,10 +599,8 @@ static int load_flat_file(struct linux_binprm * bprm,
len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
- down_write(&current->mm->mmap_sem);
- textpos = do_mmap(0, 0, len,
+ textpos = vm_mmap(0, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (!textpos || IS_ERR_VALUE(textpos)) {
if (!textpos)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index e4fc746629a7..4517aaff61b4 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -147,10 +147,8 @@ static int map_som_binary(struct file *file,
code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize);
current->mm->start_code = code_start;
current->mm->end_code = code_start + code_size;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(file, code_start, code_size, prot,
+ retval = vm_mmap(file, code_start, code_size, prot,
flags, SOM_PAGESTART(hpuxhdr->exec_tfile));
- up_write(&current->mm->mmap_sem);
if (retval < 0 && retval > -1024)
goto out;
@@ -158,20 +156,16 @@ static int map_som_binary(struct file *file,
data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize);
current->mm->start_data = data_start;
current->mm->end_data = bss_start = data_start + data_size;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(file, data_start, data_size,
+ retval = vm_mmap(file, data_start, data_size,
prot | PROT_WRITE, flags,
SOM_PAGESTART(hpuxhdr->exec_dfile));
- up_write(&current->mm->mmap_sem);
if (retval < 0 && retval > -1024)
goto out;
som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize);
current->mm->start_brk = current->mm->brk = som_brk;
- down_write(&current->mm->mmap_sem);
- retval = do_mmap(NULL, bss_start, som_brk - bss_start,
+ retval = vm_mmap(NULL, bss_start, som_brk - bss_start,
prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0);
- up_write(&current->mm->mmap_sem);
if (retval > 0 || retval < -1024)
retval = 0;
out:
diff --git a/fs/bio.c b/fs/bio.c
index e453924036e9..84da88539046 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -505,9 +505,14 @@ EXPORT_SYMBOL(bio_clone);
int bio_get_nr_vecs(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
- return min_t(unsigned,
+ int nr_pages;
+
+ nr_pages = min_t(unsigned,
queue_max_segments(q),
queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
+
+ return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
+
}
EXPORT_SYMBOL(bio_get_nr_vecs);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e08f6a20a5bb..ba11c30f302d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -70,7 +70,7 @@ static void bdev_inode_switch_bdi(struct inode *inode,
spin_unlock(&dst->wb.list_lock);
}
-static sector_t max_block(struct block_device *bdev)
+sector_t blkdev_max_block(struct block_device *bdev)
{
sector_t retval = ~((sector_t)0);
loff_t sz = i_size_read(bdev->bd_inode);
@@ -163,7 +163,7 @@ static int
blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- if (iblock >= max_block(I_BDEV(inode))) {
+ if (iblock >= blkdev_max_block(I_BDEV(inode))) {
if (create)
return -EIO;
@@ -185,7 +185,7 @@ static int
blkdev_get_blocks(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- sector_t end_block = max_block(I_BDEV(inode));
+ sector_t end_block = blkdev_max_block(I_BDEV(inode));
unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
if ((iblock + max_blocks) > end_block) {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f4e90748940a..bcec06750232 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -22,6 +22,7 @@
#include "ulist.h"
#include "transaction.h"
#include "delayed-ref.h"
+#include "locking.h"
/*
* this structure records all encountered refs on the way up to the root
@@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
s64 bytes_left = size - 1;
struct extent_buffer *eb = eb_in;
struct btrfs_key found_key;
+ int leave_spinning = path->leave_spinning;
if (bytes_left >= 0)
dest[bytes_left] = '\0';
+ path->leave_spinning = 1;
while (1) {
len = btrfs_inode_ref_name_len(eb, iref);
bytes_left -= len;
if (bytes_left >= 0)
read_extent_buffer(eb, dest + bytes_left,
(unsigned long)(iref + 1), len);
- if (eb != eb_in)
+ if (eb != eb_in) {
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
+ }
ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
if (ret > 0)
ret = -ENOENT;
@@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
slot = path->slots[0];
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
- if (eb != eb_in)
+ if (eb != eb_in) {
atomic_inc(&eb->refs);
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ }
btrfs_release_path(path);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
@@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
}
btrfs_release_path(path);
+ path->leave_spinning = leave_spinning;
if (ret)
return ERR_PTR(ret);
@@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_path *path,
iterate_irefs_t *iterate, void *ctx)
{
- int ret;
+ int ret = 0;
int slot;
u32 cur;
u32 len;
@@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_inode_ref *iref;
struct btrfs_key found_key;
- while (1) {
+ while (!ret) {
+ path->leave_spinning = 1;
ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
&found_key);
if (ret < 0)
@@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
atomic_inc(&eb->refs);
+ btrfs_tree_read_lock(eb);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
item = btrfs_item_nr(eb, slot);
@@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
(unsigned long long)found_key.objectid,
(unsigned long long)fs_root->objectid);
ret = iterate(parent, iref, eb, ctx);
- if (ret) {
- free_extent_buffer(eb);
+ if (ret)
break;
- }
len = sizeof(*iref) + name_len;
iref = (struct btrfs_inode_ref *)((char *)iref + len);
}
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
@@ -1414,6 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
void free_ipath(struct inode_fs_paths *ipath)
{
+ if (!ipath)
+ return;
kfree(ipath->fspath);
kfree(ipath);
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e801f226d7e0..4106264fbc65 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -220,10 +220,12 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
*/
static void add_root_to_dirty_list(struct btrfs_root *root)
{
+ spin_lock(&root->fs_info->trans_lock);
if (root->track_dirty && list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
+ spin_unlock(&root->fs_info->trans_lock);
}
/*
@@ -723,7 +725,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
cur = btrfs_find_tree_block(root, blocknr, blocksize);
if (cur)
- uptodate = btrfs_buffer_uptodate(cur, gen);
+ uptodate = btrfs_buffer_uptodate(cur, gen, 0);
else
uptodate = 0;
if (!cur || !uptodate) {
@@ -1358,7 +1360,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block1 = btrfs_node_blockptr(parent, slot - 1);
gen = btrfs_node_ptr_generation(parent, slot - 1);
eb = btrfs_find_tree_block(root, block1, blocksize);
- if (eb && btrfs_buffer_uptodate(eb, gen))
+ /*
+ * if we get -eagain from btrfs_buffer_uptodate, we
+ * don't want to return eagain here. That will loop
+ * forever
+ */
+ if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block1 = 0;
free_extent_buffer(eb);
}
@@ -1366,7 +1373,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block2 = btrfs_node_blockptr(parent, slot + 1);
gen = btrfs_node_ptr_generation(parent, slot + 1);
eb = btrfs_find_tree_block(root, block2, blocksize);
- if (eb && btrfs_buffer_uptodate(eb, gen))
+ if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block2 = 0;
free_extent_buffer(eb);
}
@@ -1504,8 +1511,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
if (tmp) {
- if (btrfs_buffer_uptodate(tmp, 0)) {
- if (btrfs_buffer_uptodate(tmp, gen)) {
+ /* first we do an atomic uptodate check */
+ if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
+ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
/*
* we found an up to date block without
* sleeping, return
@@ -1523,8 +1531,9 @@ read_block_for_search(struct btrfs_trans_handle *trans,
free_extent_buffer(tmp);
btrfs_set_path_blocking(p);
+ /* now we're allowed to do a blocking uptodate check */
tmp = read_tree_block(root, blocknr, blocksize, gen);
- if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
*eb_ret = tmp;
return 0;
}
@@ -1559,7 +1568,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
- if (!btrfs_buffer_uptodate(tmp, 0))
+ if (!btrfs_buffer_uptodate(tmp, 0, 0))
ret = -EIO;
free_extent_buffer(tmp);
}
@@ -4043,7 +4052,7 @@ again:
tmp = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
- if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+ if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
free_extent_buffer(tmp);
break;
}
@@ -4166,7 +4175,8 @@ next:
struct extent_buffer *cur;
cur = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
- if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
+ if (!cur ||
+ btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
slot++;
if (cur)
free_extent_buffer(cur);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3f65a812e282..8fd72331d600 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1078,7 +1078,7 @@ struct btrfs_fs_info {
* is required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit;
- unsigned long mount_opt:21;
+ unsigned long mount_opt;
unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 20196f411206..a7ffc88a7dbe 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -323,7 +323,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
* in the wrong place.
*/
static int verify_parent_transid(struct extent_io_tree *io_tree,
- struct extent_buffer *eb, u64 parent_transid)
+ struct extent_buffer *eb, u64 parent_transid,
+ int atomic)
{
struct extent_state *cached_state = NULL;
int ret;
@@ -331,6 +332,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
return 0;
+ if (atomic)
+ return -EAGAIN;
+
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
0, &cached_state);
if (extent_buffer_uptodate(eb) &&
@@ -372,7 +376,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
ret = read_extent_buffer_pages(io_tree, eb, start,
WAIT_COMPLETE,
btree_get_extent, mirror_num);
- if (!ret && !verify_parent_transid(io_tree, eb, parent_transid))
+ if (!ret && !verify_parent_transid(io_tree, eb,
+ parent_transid, 0))
break;
/*
@@ -383,17 +388,16 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
break;
- if (!failed_mirror) {
- failed = 1;
- printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
- failed_mirror = eb->failed_mirror;
- }
-
num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
eb->start, eb->len);
if (num_copies == 1)
break;
+ if (!failed_mirror) {
+ failed = 1;
+ failed_mirror = eb->read_mirror;
+ }
+
mirror_num++;
if (mirror_num == failed_mirror)
mirror_num++;
@@ -564,7 +568,7 @@ struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
}
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state)
+ struct extent_state *state, int mirror)
{
struct extent_io_tree *tree;
u64 found_start;
@@ -589,6 +593,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (!reads_done)
goto err;
+ eb->read_mirror = mirror;
if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
ret = -EIO;
goto err;
@@ -652,7 +657,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
- eb->failed_mirror = failed_mirror;
+ eb->read_mirror = failed_mirror;
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */
@@ -1202,7 +1207,7 @@ static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
root->commit_root = NULL;
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
- if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
+ if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) {
free_extent_buffer(root->node);
root->node = NULL;
return -EIO;
@@ -2254,9 +2259,9 @@ int open_ctree(struct super_block *sb,
goto fail_sb_buffer;
}
- if (sectorsize < PAGE_SIZE) {
- printk(KERN_WARNING "btrfs: Incompatible sector size "
- "found on %s\n", sb->s_id);
+ if (sectorsize != PAGE_SIZE) {
+ printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
+ "found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer;
}
@@ -3143,7 +3148,8 @@ int close_ctree(struct btrfs_root *root)
return 0;
}
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
+ int atomic)
{
int ret;
struct inode *btree_inode = buf->pages[0]->mapping->host;
@@ -3153,7 +3159,9 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
return ret;
ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
- parent_transid);
+ parent_transid, atomic);
+ if (ret == -EAGAIN)
+ return ret;
return !ret;
}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index a7ace1a2dd12..ab1830aaf0ed 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -66,7 +66,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
+ int atomic);
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2b35f8d14bb9..49fd7b66d57b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2301,6 +2301,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
if (ret) {
printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
+ spin_lock(&delayed_refs->lock);
return ret;
}
@@ -2331,6 +2332,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
if (ret) {
printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
+ spin_lock(&delayed_refs->lock);
return ret;
}
@@ -3769,13 +3771,10 @@ again:
*/
if (current->journal_info)
return -EAGAIN;
- ret = wait_event_interruptible(space_info->wait,
- !space_info->flush);
- /* Must have been interrupted, return */
- if (ret) {
- printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__);
+ ret = wait_event_killable(space_info->wait, !space_info->flush);
+ /* Must have been killed, return */
+ if (ret)
return -EINTR;
- }
spin_lock(&space_info->lock);
}
@@ -4215,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = calc_global_metadata_size(fs_info);
- spin_lock(&block_rsv->lock);
spin_lock(&sinfo->lock);
+ spin_lock(&block_rsv->lock);
block_rsv->size = num_bytes;
@@ -4242,8 +4241,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->full = 1;
}
- spin_unlock(&sinfo->lock);
spin_unlock(&block_rsv->lock);
+ spin_unlock(&sinfo->lock);
}
static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
@@ -6569,7 +6568,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
goto skip;
}
- if (!btrfs_buffer_uptodate(next, generation)) {
+ if (!btrfs_buffer_uptodate(next, generation, 0)) {
btrfs_tree_unlock(next);
free_extent_buffer(next);
next = NULL;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cd4b5e400221..c9018a05036e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -402,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
return 0;
}
+static struct extent_state *next_state(struct extent_state *state)
+{
+ struct rb_node *next = rb_next(&state->rb_node);
+ if (next)
+ return rb_entry(next, struct extent_state, rb_node);
+ else
+ return NULL;
+}
+
/*
* utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1), or
- * forcibly remove the state from the tree (delete == 1).
+ * it will optionally wake up any one waiting on this state (wake == 1)
*
* If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree
*/
-static int clear_state_bit(struct extent_io_tree *tree,
- struct extent_state *state,
- int *bits, int wake)
+static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
+ struct extent_state *state,
+ int *bits, int wake)
{
+ struct extent_state *next;
int bits_to_clear = *bits & ~EXTENT_CTLBITS;
- int ret = state->state & bits_to_clear;
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
@@ -427,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
if (wake)
wake_up(&state->wq);
if (state->state == 0) {
+ next = next_state(state);
if (state->tree) {
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
@@ -436,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
}
} else {
merge_state(tree, state);
+ next = next_state(state);
}
- return ret;
+ return next;
}
static struct extent_state *
@@ -476,7 +486,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct extent_state *cached;
struct extent_state *prealloc = NULL;
- struct rb_node *next_node;
struct rb_node *node;
u64 last_end;
int err;
@@ -528,14 +537,11 @@ hit_next:
WARN_ON(state->end < start);
last_end = state->end;
- if (state->end < end && !need_resched())
- next_node = rb_next(&state->rb_node);
- else
- next_node = NULL;
-
/* the state doesn't have the wanted bits, go ahead */
- if (!(state->state & bits))
+ if (!(state->state & bits)) {
+ state = next_state(state);
goto next;
+ }
/*
* | ---- desired range ---- |
@@ -593,16 +599,13 @@ hit_next:
goto out;
}
- clear_state_bit(tree, state, &bits, wake);
+ state = clear_state_bit(tree, state, &bits, wake);
next:
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
- if (start <= end && next_node) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
+ if (start <= end && state && !need_resched())
goto hit_next;
- }
goto search_again;
out:
@@ -2301,7 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
u64 start;
u64 end;
int whole_page;
- int failed_mirror;
+ int mirror;
int ret;
if (err)
@@ -2340,20 +2343,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
spin_unlock(&tree->lock);
+ mirror = (int)(unsigned long)bio->bi_bdev;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
- state);
+ state, mirror);
if (ret)
uptodate = 0;
else
clean_io_failure(start, page);
}
- if (!uptodate)
- failed_mirror = (int)(unsigned long)bio->bi_bdev;
-
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
- ret = tree->ops->readpage_io_failed_hook(page, failed_mirror);
+ ret = tree->ops->readpage_io_failed_hook(page, mirror);
if (!ret && !err &&
test_bit(BIO_UPTODATE, &bio->bi_flags))
uptodate = 1;
@@ -2368,8 +2369,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
* can't handle the error it will return -EIO and we
* remain responsible for that page.
*/
- ret = bio_readpage_error(bio, page, start, end,
- failed_mirror, NULL);
+ ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
if (ret == 0) {
uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -4120,6 +4120,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
if (atomic_inc_not_zero(&exists->refs)) {
spin_unlock(&mapping->private_lock);
unlock_page(p);
+ page_cache_release(p);
mark_extent_buffer_accessed(exists);
goto free_eb;
}
@@ -4199,8 +4200,7 @@ free_eb:
unlock_page(eb->pages[i]);
}
- if (!atomic_dec_and_test(&eb->refs))
- return exists;
+ WARN_ON(!atomic_dec_and_test(&eb->refs));
btrfs_release_extent_buffer(eb);
return exists;
}
@@ -4462,7 +4462,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
}
clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
- eb->failed_mirror = 0;
+ eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index faf10eb57f75..b516c3b8dec6 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -79,7 +79,7 @@ struct extent_io_ops {
u64 start, u64 end,
struct extent_state *state);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
- struct extent_state *state);
+ struct extent_state *state, int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -135,7 +135,7 @@ struct extent_buffer {
spinlock_t refs_lock;
atomic_t refs;
atomic_t io_pages;
- int failed_mirror;
+ int read_mirror;
struct list_head leak_list;
struct rcu_head rcu_head;
pid_t lock_owner;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d83260d7498f..53bf2d764bbc 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
int extent_type;
int recow;
int ret;
+ int modify_tree = -1;
if (drop_cache)
btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
if (!path)
return -ENOMEM;
+ if (start >= BTRFS_I(inode)->disk_i_size)
+ modify_tree = 0;
+
while (1) {
recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino,
- search_start, -1);
+ search_start, modify_tree);
if (ret < 0)
break;
if (ret > 0 && path->slots[0] > 0 && search_start == start) {
@@ -634,7 +638,8 @@ next_slot:
}
search_start = max(key.offset, start);
- if (recow) {
+ if (recow || !modify_tree) {
+ modify_tree = -1;
btrfs_release_path(path);
continue;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 115bc05e42b0..61b16c641ce0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1947,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
* extent_io.c will try to find good copies for us.
*/
static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state)
+ struct extent_state *state, int mirror)
{
size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
struct inode *inode = page->mapping->host;
@@ -4069,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s,
BTRFS_I(inode)->dummy_inode = 1;
inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
- inode->i_op = &simple_dir_inode_operations;
+ inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -4140,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
static int btrfs_dentry_delete(const struct dentry *dentry)
{
struct btrfs_root *root;
+ struct inode *inode = dentry->d_inode;
- if (!dentry->d_inode && !IS_ROOT(dentry))
- dentry = dentry->d_parent;
+ if (!inode && !IS_ROOT(dentry))
+ inode = dentry->d_parent->d_inode;
- if (dentry->d_inode) {
- root = BTRFS_I(dentry->d_inode)->root;
+ if (inode) {
+ root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0)
return 1;
+
+ if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ return 1;
}
return 0;
}
@@ -4188,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
struct btrfs_path *path;
struct list_head ins_list;
struct list_head del_list;
- struct qstr q;
int ret;
struct extent_buffer *leaf;
int slot;
@@ -4279,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
while (di_cur < di_total) {
struct btrfs_key location;
- struct dentry *tmp;
if (verify_dir_item(root, leaf, di))
break;
@@ -4300,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location);
- q.name = name_ptr;
- q.len = name_len;
- q.hash = full_name_hash(q.name, q.len);
- tmp = d_lookup(filp->f_dentry, &q);
- if (!tmp) {
- struct btrfs_key *newkey;
-
- newkey = kzalloc(sizeof(struct btrfs_key),
- GFP_NOFS);
- if (!newkey)
- goto no_dentry;
- tmp = d_alloc(filp->f_dentry, &q);
- if (!tmp) {
- kfree(newkey);
- dput(tmp);
- goto no_dentry;
- }
- memcpy(newkey, &location,
- sizeof(struct btrfs_key));
- tmp->d_fsdata = newkey;
- tmp->d_flags |= DCACHE_NEED_LOOKUP;
- d_rehash(tmp);
- dput(tmp);
- } else {
- dput(tmp);
- }
-no_dentry:
+
/* is this a reference to our own snapshot? If so
- * skip it
+ * skip it.
+ *
+ * In contrast to old kernels, we insert the snapshot's
+ * dir item and dir index after it has been created, so
+ * we won't find a reference to our own snapshot. We
+ * still keep the following code for backward
+ * compatibility.
*/
if (location.type == BTRFS_ROOT_ITEM_KEY &&
location.objectid == root->root_key.objectid) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 18cc23d164a8..14f8e1faa46e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2262,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
di_args->bytes_used = dev->bytes_used;
di_args->total_bytes = dev->total_bytes;
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
- strncpy(di_args->path, dev->name, sizeof(di_args->path));
+ if (dev->name)
+ strncpy(di_args->path, dev->name, sizeof(di_args->path));
+ else
+ di_args->path[0] = '\0';
out:
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 4f69028a68c4..086e6bdae1c4 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -252,7 +252,7 @@ struct btrfs_data_container {
struct btrfs_ioctl_ino_path_args {
__u64 inum; /* in */
- __u32 size; /* in */
+ __u64 size; /* in */
__u64 reserved[4];
/* struct btrfs_data_container *fspath; out */
__u64 fspath; /* out */
@@ -260,7 +260,7 @@ struct btrfs_ioctl_ino_path_args {
struct btrfs_ioctl_logical_ino_args {
__u64 logical; /* in */
- __u32 size; /* in */
+ __u64 size; /* in */
__u64 reserved[4];
/* struct btrfs_data_container *inodes; out */
__u64 inodes;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index dc5d33146fdb..ac5d01085884 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
struct btrfs_bio *bbio)
{
int ret;
- int looped = 0;
struct reada_zone *zone;
struct btrfs_block_group_cache *cache = NULL;
u64 start;
u64 end;
int i;
-again:
zone = NULL;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
@@ -274,9 +272,6 @@ again:
spin_unlock(&fs_info->reada_lock);
}
- if (looped)
- return NULL;
-
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
return NULL;
@@ -307,13 +302,15 @@ again:
ret = radix_tree_insert(&dev->reada_zones,
(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
zone);
- spin_unlock(&fs_info->reada_lock);
- if (ret) {
+ if (ret == -EEXIST) {
kfree(zone);
- looped = 1;
- goto again;
+ ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
+ logical >> PAGE_CACHE_SHIFT, 1);
+ if (ret == 1)
+ kref_get(&zone->refcnt);
}
+ spin_unlock(&fs_info->reada_lock);
return zone;
}
@@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct btrfs_key *top, int level)
{
int ret;
- int looped = 0;
struct reada_extent *re = NULL;
+ struct reada_extent *re_exist = NULL;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct btrfs_bio *bbio = NULL;
struct btrfs_device *dev;
+ struct btrfs_device *prev_dev;
u32 blocksize;
u64 length;
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
-again:
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
kref_get(&re->refcnt);
spin_unlock(&fs_info->reada_lock);
- if (re || looped)
+ if (re)
return re;
re = kzalloc(sizeof(*re), GFP_NOFS);
@@ -398,16 +395,31 @@ again:
/* insert extent in reada_tree + all per-device trees, all or nothing */
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
+ if (ret == -EEXIST) {
+ re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
+ BUG_ON(!re_exist);
+ kref_get(&re_exist->refcnt);
+ spin_unlock(&fs_info->reada_lock);
+ goto error;
+ }
if (ret) {
spin_unlock(&fs_info->reada_lock);
- if (ret != -ENOMEM) {
- /* someone inserted the extent in the meantime */
- looped = 1;
- }
goto error;
}
+ prev_dev = NULL;
for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev;
+ if (dev == prev_dev) {
+ /*
+ * in case of DUP, just add the first zone. As both
+ * are on the same device, there's nothing to gain
+ * from adding both.
+ * Also, it wouldn't work, as the tree is per device
+ * and adding would fail with EEXIST
+ */
+ continue;
+ }
+ prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
while (--i >= 0) {
@@ -450,9 +462,7 @@ error:
}
kfree(bbio);
kfree(re);
- if (looped)
- goto again;
- return NULL;
+ return re_exist;
}
static void reada_kref_dummy(struct kref *kr)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 017281dbb2a7..646ee21bb035 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1279,7 +1279,9 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
if (rb_node)
backref_tree_panic(rb_node, -EEXIST, node->bytenr);
} else {
+ spin_lock(&root->fs_info->trans_lock);
list_del_init(&root->root_list);
+ spin_unlock(&root->fs_info->trans_lock);
kfree(node);
}
return 0;
@@ -3811,7 +3813,7 @@ restart:
ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
if (ret < 0) {
- if (ret != -EAGAIN) {
+ if (ret != -ENOSPC) {
err = ret;
WARN_ON(1);
break;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bc015f77f3ea..2f3d6f917fb3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -998,6 +998,7 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev,
page = sblock->pagev + page_index;
page->logical = logical;
page->physical = bbio->stripes[mirror_index].physical;
+ /* for missing devices, bdev is NULL */
page->bdev = bbio->stripes[mirror_index].dev->bdev;
page->mirror_num = mirror_index + 1;
page->page = alloc_page(GFP_NOFS);
@@ -1042,6 +1043,12 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct scrub_page *page = sblock->pagev + page_num;
DECLARE_COMPLETION_ONSTACK(complete);
+ if (page->bdev == NULL) {
+ page->io_error = 1;
+ sblock->no_io_error_seen = 0;
+ continue;
+ }
+
BUG_ON(!page->page);
bio = bio_alloc(GFP_NOFS, 1);
if (!bio)
@@ -1257,12 +1264,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
if (memcmp(csum, on_disk_csum, sdev->csum_size))
fail = 1;
- if (fail) {
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.csum_errors;
- spin_unlock(&sdev->stat_lock);
- }
-
return fail;
}
@@ -1335,15 +1336,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
++crc_fail;
- if (crc_fail || fail) {
- spin_lock(&sdev->stat_lock);
- if (crc_fail)
- ++sdev->stat.csum_errors;
- if (fail)
- ++sdev->stat.verify_errors;
- spin_unlock(&sdev->stat_lock);
- }
-
return fail || crc_fail;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8d5d380f7bdb..c5f8fca4195f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -815,7 +815,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 0);
@@ -1148,13 +1147,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
} else {
- if (fs_info->fs_devices->rw_devices == 0)
+ if (fs_info->fs_devices->rw_devices == 0) {
ret = -EACCES;
goto restore;
+ }
- if (btrfs_super_log_root(fs_info->super_copy) != 0)
+ if (btrfs_super_log_root(fs_info->super_copy) != 0) {
ret = -EINVAL;
goto restore;
+ }
ret = btrfs_cleanup_fs_roots(fs_info);
if (ret)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 11b77a59db62..36422254ef67 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -73,8 +73,10 @@ loop:
cur_trans = root->fs_info->running_transaction;
if (cur_trans) {
- if (cur_trans->aborted)
+ if (cur_trans->aborted) {
+ spin_unlock(&root->fs_info->trans_lock);
return cur_trans->aborted;
+ }
atomic_inc(&cur_trans->use_count);
atomic_inc(&cur_trans->num_writers);
cur_trans->num_joined++;
@@ -1400,6 +1402,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = commit_fs_roots(trans, root);
if (ret) {
mutex_unlock(&root->fs_info->tree_log_mutex);
+ mutex_unlock(&root->fs_info->reloc_mutex);
goto cleanup_transaction;
}
@@ -1411,6 +1414,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = commit_cowonly_roots(trans, root);
if (ret) {
mutex_unlock(&root->fs_info->tree_log_mutex);
+ mutex_unlock(&root->fs_info->reloc_mutex);
goto cleanup_transaction;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d017283ae6f5..eb1ae908582c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -279,7 +279,7 @@ static int process_one_buffer(struct btrfs_root *log,
log->fs_info->extent_root,
eb->start, eb->len);
- if (btrfs_buffer_uptodate(eb, gen)) {
+ if (btrfs_buffer_uptodate(eb, gen, 0)) {
if (wc->write)
btrfs_write_tree_block(eb);
if (wc->wait)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 759d02486d7c..1411b99555a4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3324,12 +3324,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
stripe_size = devices_info[ndevs-1].max_avail;
num_stripes = ndevs * dev_stripes;
- if (stripe_size * num_stripes > max_chunk_size * ncopies) {
+ if (stripe_size * ndevs > max_chunk_size * ncopies) {
stripe_size = max_chunk_size * ncopies;
- do_div(stripe_size, num_stripes);
+ do_div(stripe_size, ndevs);
}
do_div(stripe_size, dev_stripes);
+
+ /* align to BTRFS_STRIPE_LEN */
do_div(stripe_size, BTRFS_STRIPE_LEN);
stripe_size *= BTRFS_STRIPE_LEN;
@@ -3805,10 +3807,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
else if (mirror_num)
stripe_index += mirror_num - 1;
else {
+ int old_stripe_index = stripe_index;
stripe_index = find_live_mirror(map, stripe_index,
map->sub_stripes, stripe_index +
current->pid % map->sub_stripes);
- mirror_num = stripe_index + 1;
+ mirror_num = stripe_index - old_stripe_index + 1;
}
} else {
/*
@@ -4350,8 +4353,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
ret = __btrfs_open_devices(fs_devices, FMODE_READ,
root->fs_info->bdev_holder);
- if (ret)
+ if (ret) {
+ free_fs_devices(fs_devices);
goto out;
+ }
if (!fs_devices->seeding) {
__btrfs_close_devices(fs_devices);
diff --git a/fs/buffer.c b/fs/buffer.c
index 36d66653b931..ad5938ca357c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -921,6 +921,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh = head;
int uptodate = PageUptodate(page);
+ sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode));
do {
if (!buffer_mapped(bh)) {
@@ -929,7 +930,8 @@ init_page_buffers(struct page *page, struct block_device *bdev,
bh->b_blocknr = block;
if (uptodate)
set_buffer_uptodate(bh);
- set_buffer_mapped(bh);
+ if (block < end_block)
+ set_buffer_mapped(bh);
}
block++;
bh = bh->b_this_page;
@@ -985,7 +987,6 @@ grow_dev_page(struct block_device *bdev, sector_t block,
return page;
failed:
- BUG();
unlock_page(page);
page_cache_release(page);
return NULL;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d34212822444..541ef81f6ae8 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -370,13 +370,13 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
(int)(srcaddr->sa_family));
}
- seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
+ seq_printf(s, ",uid=%u", cifs_sb->mnt_uid);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
seq_printf(s, ",forceuid");
else
seq_printf(s, ",noforceuid");
- seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
+ seq_printf(s, ",gid=%u", cifs_sb->mnt_gid);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
seq_printf(s, ",forcegid");
else
@@ -434,11 +434,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",noperm");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
seq_printf(s, ",strictcache");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
+ seq_printf(s, ",backupuid=%u", cifs_sb->mnt_backupuid);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
+ seq_printf(s, ",backupgid=%u", cifs_sb->mnt_backupgid);
- seq_printf(s, ",rsize=%d", cifs_sb->rsize);
- seq_printf(s, ",wsize=%d", cifs_sb->wsize);
+ seq_printf(s, ",rsize=%u", cifs_sb->rsize);
+ seq_printf(s, ",wsize=%u", cifs_sb->wsize);
/* convert actimeo and display it in seconds */
- seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
+ seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
return 0;
}
@@ -695,7 +699,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
* origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
* the cached file length
*/
- if (origin != SEEK_SET || origin != SEEK_CUR) {
+ if (origin != SEEK_SET && origin != SEEK_CUR) {
int rc;
struct inode *inode = file->f_path.dentry->d_inode;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index d1389bb33ceb..65365358c976 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "1.77"
+#define CIFS_VERSION "1.78"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f52c5ab78f9d..da2f5446fa7a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4844,8 +4844,12 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
max_len = data_end - temp;
node->node_name = cifs_strndup_from_utf16(temp, max_len,
is_unicode, nls_codepage);
- if (!node->node_name)
+ if (!node->node_name) {
rc = -ENOMEM;
+ goto parse_DFS_referrals_exit;
+ }
+
+ ref++;
}
parse_DFS_referrals_exit:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f31dc9ac37b7..e0b56d7a19c5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -164,7 +164,8 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_sign, "sign" },
{ Opt_seal, "seal" },
{ Opt_direct, "direct" },
- { Opt_direct, "forceddirectio" },
+ { Opt_direct, "directio" },
+ { Opt_direct, "forcedirectio" },
{ Opt_strictcache, "strictcache" },
{ Opt_noac, "noac" },
{ Opt_fsc, "fsc" },
@@ -215,6 +216,8 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_ignore, "cred" },
{ Opt_ignore, "credentials" },
+ { Opt_ignore, "cred=%s" },
+ { Opt_ignore, "credentials=%s" },
{ Opt_ignore, "guest" },
{ Opt_ignore, "rw" },
{ Opt_ignore, "ro" },
@@ -2183,6 +2186,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
tcp_ses->session_estab = false;
tcp_ses->sequence_number = 0;
tcp_ses->lstrp = jiffies;
+ spin_lock_init(&tcp_ses->req_lock);
INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
@@ -3228,10 +3232,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_uid = pvolume_info->linux_uid;
cifs_sb->mnt_gid = pvolume_info->linux_gid;
- if (pvolume_info->backupuid_specified)
- cifs_sb->mnt_backupuid = pvolume_info->backupuid;
- if (pvolume_info->backupgid_specified)
- cifs_sb->mnt_backupgid = pvolume_info->backupgid;
cifs_sb->mnt_file_mode = pvolume_info->file_mode;
cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
cFYI(1, "file mode: 0x%hx dir mode: 0x%hx",
@@ -3262,10 +3262,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
if (pvolume_info->cifs_acl)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
- if (pvolume_info->backupuid_specified)
+ if (pvolume_info->backupuid_specified) {
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID;
- if (pvolume_info->backupgid_specified)
+ cifs_sb->mnt_backupuid = pvolume_info->backupuid;
+ }
+ if (pvolume_info->backupgid_specified) {
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID;
+ cifs_sb->mnt_backupgid = pvolume_info->backupgid;
+ }
if (pvolume_info->override_uid)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
if (pvolume_info->override_gid)
@@ -3614,22 +3618,6 @@ cifs_get_volume_info(char *mount_data, const char *devname)
return volume_info;
}
-/* make sure ra_pages is a multiple of rsize */
-static inline unsigned int
-cifs_ra_pages(struct cifs_sb_info *cifs_sb)
-{
- unsigned int reads;
- unsigned int rsize_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
-
- if (rsize_pages >= default_backing_dev_info.ra_pages)
- return default_backing_dev_info.ra_pages;
- else if (rsize_pages == 0)
- return rsize_pages;
-
- reads = default_backing_dev_info.ra_pages / rsize_pages;
- return reads * rsize_pages;
-}
-
int
cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
{
@@ -3717,7 +3705,7 @@ try_mount_again:
cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info);
/* tune readahead according to rsize */
- cifs_sb->bdi.ra_pages = cifs_ra_pages(cifs_sb);
+ cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
remote_path_check:
#ifdef CONFIG_CIFS_DFS_UPCALL
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index d172c8ed9017..ec4e9a2a12f8 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -668,12 +668,19 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
return 0;
else {
/*
- * Forcibly invalidate automounting directory inodes
- * (remote DFS directories) so to have them
- * instantiated again for automount
+ * If the inode wasn't known to be a dfs entry when
+ * the dentry was instantiated, such as when created
+ * via ->readdir(), it needs to be set now since the
+ * attributes will have been updated by
+ * cifs_revalidate_dentry().
*/
- if (IS_AUTOMOUNT(direntry->d_inode))
- return 0;
+ if (IS_AUTOMOUNT(direntry->d_inode) &&
+ !(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
+ spin_lock(&direntry->d_lock);
+ direntry->d_flags |= DCACHE_NEED_AUTOMOUNT;
+ spin_unlock(&direntry->d_lock);
+ }
+
return 1;
}
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index fae765dac934..81725e9286e9 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2178,7 +2178,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
unsigned long nr_pages, i;
size_t copied, len, cur_len;
ssize_t total_written = 0;
- loff_t offset = *poffset;
+ loff_t offset;
struct iov_iter it;
struct cifsFileInfo *open_file;
struct cifs_tcon *tcon;
@@ -2200,6 +2200,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
open_file = file->private_data;
tcon = tlink_tcon(open_file->tlink);
+ offset = *poffset;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
diff --git a/fs/dcache.c b/fs/dcache.c
index b60ddc41d783..8c1ab8fb5012 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -141,18 +141,25 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
* Compare 2 name strings, return 0 if they match, otherwise non-zero.
* The strings are both count bytes long, and count is non-zero.
*/
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
- const unsigned char *ct, size_t tcount)
-{
#ifdef CONFIG_DCACHE_WORD_ACCESS
- unsigned long a,b,mask;
- if (unlikely(scount != tcount))
- return 1;
+#include <asm/word-at-a-time.h>
+/*
+ * NOTE! 'cs' and 'scount' come from a dentry, so it has a
+ * aligned allocation for this particular component. We don't
+ * strictly need the load_unaligned_zeropad() safety, but it
+ * doesn't hurt either.
+ *
+ * In contrast, 'ct' and 'tcount' can be from a pathname, and do
+ * need the careful unaligned handling.
+ */
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
+{
+ unsigned long a,b,mask;
for (;;) {
a = *(unsigned long *)cs;
- b = *(unsigned long *)ct;
+ b = load_unaligned_zeropad(ct);
if (tcount < sizeof(unsigned long))
break;
if (unlikely(a != b))
@@ -165,10 +172,12 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
}
mask = ~(~0ul << tcount*8);
return unlikely(!!((a ^ b) & mask));
+}
+
#else
- if (scount != tcount)
- return 1;
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
+{
do {
if (*cs != *ct)
return 1;
@@ -177,7 +186,32 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
tcount--;
} while (tcount);
return 0;
+}
+
#endif
+
+static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
+{
+ const unsigned char *cs;
+ /*
+ * Be careful about RCU walk racing with rename:
+ * use ACCESS_ONCE to fetch the name pointer.
+ *
+ * NOTE! Even if a rename will mean that the length
+ * was not loaded atomically, we don't care. The
+ * RCU walk will check the sequence count eventually,
+ * and catch it. And we won't overrun the buffer,
+ * because we're reading the name pointer atomically,
+ * and a dentry name is guaranteed to be properly
+ * terminated with a NUL byte.
+ *
+ * End result: even if 'len' is wrong, we'll exit
+ * early because the data cannot match (there can
+ * be no NUL in the ct/tcount data)
+ */
+ cs = ACCESS_ONCE(dentry->d_name.name);
+ smp_read_barrier_depends();
+ return dentry_string_cmp(cs, ct, tcount);
}
static void __d_free(struct rcu_head *head)
@@ -1240,6 +1274,13 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
if (!dentry)
return NULL;
+ /*
+ * We guarantee that the inline name is always NUL-terminated.
+ * This way the memcpy() done by the name switching in rename
+ * will still always have a NUL at the end, even if we might
+ * be overwriting an internal NUL character
+ */
+ dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
if (name->len > DNAME_INLINE_LEN-1) {
dname = kmalloc(name->len + 1, GFP_KERNEL);
if (!dname) {
@@ -1249,13 +1290,16 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
} else {
dname = dentry->d_iname;
}
- dentry->d_name.name = dname;
dentry->d_name.len = name->len;
dentry->d_name.hash = name->hash;
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
+ /* Make sure we always see the terminating NUL character */
+ smp_wmb();
+ dentry->d_name.name = dname;
+
dentry->d_count = 1;
dentry->d_flags = 0;
spin_lock_init(&dentry->d_lock);
@@ -1421,18 +1465,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
}
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
- struct qstr *qstr = &alias->d_name;
-
/*
* Don't need alias->d_lock here, because aliases with
* d_parent == entry->d_parent are not subject to name or
* parent changes, because the parent inode i_mutex is held.
*/
- if (qstr->hash != hash)
+ if (alias->d_name.hash != hash)
continue;
if (alias->d_parent != entry->d_parent)
continue;
- if (dentry_cmp(qstr->name, qstr->len, name, len))
+ if (alias->d_name.len != len)
+ continue;
+ if (dentry_cmp(alias, name, len))
continue;
__dget(alias);
return alias;
@@ -1471,7 +1515,7 @@ struct dentry *d_make_root(struct inode *root_inode)
struct dentry *res = NULL;
if (root_inode) {
- static const struct qstr name = { .name = "/", .len = 1 };
+ static const struct qstr name = QSTR_INIT("/", 1);
res = __d_alloc(root_inode->i_sb, &name);
if (res)
@@ -1709,6 +1753,48 @@ err_out:
}
EXPORT_SYMBOL(d_add_ci);
+/*
+ * Do the slow-case of the dentry name compare.
+ *
+ * Unlike the dentry_cmp() function, we need to atomically
+ * load the name, length and inode information, so that the
+ * filesystem can rely on them, and can use the 'name' and
+ * 'len' information without worrying about walking off the
+ * end of memory etc.
+ *
+ * Thus the read_seqcount_retry() and the "duplicate" info
+ * in arguments (the low-level filesystem should not look
+ * at the dentry inode or name contents directly, since
+ * rename can change them while we're in RCU mode).
+ */
+enum slow_d_compare {
+ D_COMP_OK,
+ D_COMP_NOMATCH,
+ D_COMP_SEQRETRY,
+};
+
+static noinline enum slow_d_compare slow_dentry_cmp(
+ const struct dentry *parent,
+ struct inode *inode,
+ struct dentry *dentry,
+ unsigned int seq,
+ const struct qstr *name)
+{
+ int tlen = dentry->d_name.len;
+ const char *tname = dentry->d_name.name;
+ struct inode *i = dentry->d_inode;
+
+ if (read_seqcount_retry(&dentry->d_seq, seq)) {
+ cpu_relax();
+ return D_COMP_SEQRETRY;
+ }
+ if (parent->d_op->d_compare(parent, inode,
+ dentry, i,
+ tlen, tname, name))
+ return D_COMP_NOMATCH;
+ return D_COMP_OK;
+}
+
/**
* __d_lookup_rcu - search for a dentry (racy, store-free)
* @parent: parent dentry
@@ -1735,15 +1821,17 @@ EXPORT_SYMBOL(d_add_ci);
* the returned dentry, so long as its parent's seqlock is checked after the
* child is looked up. Thus, an interlocking stepping of sequence lock checks
* is formed, giving integrity down the path walk.
+ *
+ * NOTE! The caller *has* to check the resulting dentry against the sequence
+ * number we've returned before using any of the resulting dentry state!
*/
struct dentry *__d_lookup_rcu(const struct dentry *parent,
const struct qstr *name,
- unsigned *seqp, struct inode **inode)
+ unsigned *seqp, struct inode *inode)
{
- unsigned int len = name->len;
- unsigned int hash = name->hash;
+ u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
- struct hlist_bl_head *b = d_hash(parent, hash);
+ struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
struct hlist_bl_node *node;
struct dentry *dentry;
@@ -1769,49 +1857,47 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
*/
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
unsigned seq;
- struct inode *i;
- const char *tname;
- int tlen;
-
- if (dentry->d_name.hash != hash)
- continue;
seqretry:
- seq = read_seqcount_begin(&dentry->d_seq);
+ /*
+ * The dentry sequence count protects us from concurrent
+ * renames, and thus protects inode, parent and name fields.
+ *
+ * The caller must perform a seqcount check in order
+ * to do anything useful with the returned dentry,
+ * including using the 'd_inode' pointer.
+ *
+ * NOTE! We do a "raw" seqcount_begin here. That means that
+ * we don't wait for the sequence count to stabilize if it
+ * is in the middle of a sequence change. If we do the slow
+ * dentry compare, we will do seqretries until it is stable,
+ * and if we end up with a successful lookup, we actually
+ * want to exit RCU lookup anyway.
+ */
+ seq = raw_seqcount_begin(&dentry->d_seq);
if (dentry->d_parent != parent)
continue;
if (d_unhashed(dentry))
continue;
- tlen = dentry->d_name.len;
- tname = dentry->d_name.name;
- i = dentry->d_inode;
- prefetch(tname);
- /*
- * This seqcount check is required to ensure name and
- * len are loaded atomically, so as not to walk off the
- * edge of memory when walking. If we could load this
- * atomically some other way, we could drop this check.
- */
- if (read_seqcount_retry(&dentry->d_seq, seq))
- goto seqretry;
+ *seqp = seq;
+
if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
- if (parent->d_op->d_compare(parent, *inode,
- dentry, i,
- tlen, tname, name))
+ if (dentry->d_name.hash != hashlen_hash(hashlen))
continue;
- } else {
- if (dentry_cmp(tname, tlen, str, len))
+ switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
+ case D_COMP_OK:
+ return dentry;
+ case D_COMP_NOMATCH:
continue;
+ default:
+ goto seqretry;
+ }
}
- /*
- * No extra seqcount check is required after the name
- * compare. The caller must perform a seqcount check in
- * order to do anything useful with the returned dentry
- * anyway.
- */
- *seqp = seq;
- *inode = i;
- return dentry;
+
+ if (dentry->d_name.hash_len != hashlen)
+ continue;
+ if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
+ return dentry;
}
return NULL;
}
@@ -1890,8 +1976,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
rcu_read_lock();
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
- const char *tname;
- int tlen;
if (dentry->d_name.hash != hash)
continue;
@@ -1906,15 +1990,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
* It is safe to compare names since d_move() cannot
* change the qstr (protected by d_lock).
*/
- tlen = dentry->d_name.len;
- tname = dentry->d_name.name;
if (parent->d_flags & DCACHE_OP_COMPARE) {
+ int tlen = dentry->d_name.len;
+ const char *tname = dentry->d_name.name;
if (parent->d_op->d_compare(parent, parent->d_inode,
dentry, dentry->d_inode,
tlen, tname, name))
goto next;
} else {
- if (dentry_cmp(tname, tlen, str, len))
+ if (dentry->d_name.len != len)
+ goto next;
+ if (dentry_cmp(dentry, str, len))
goto next;
}
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index fa5c07d51dcc..4c58d4a3adc4 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1737,6 +1737,18 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
return 1;
/*
+ * Even if the convert is compat with all granted locks,
+ * QUECVT forces it behind other locks on the convert queue.
+ */
+
+ if (now && conv && (lkb->lkb_exflags & DLM_LKF_QUECVT)) {
+ if (list_empty(&r->res_convertqueue))
+ return 1;
+ else
+ goto out;
+ }
+
+ /*
* The NOORDER flag is set to avoid the standard vms rules on grant
* order.
*/
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 739b0985b398..c0b3c70ee87a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1663,8 +1663,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
if (op == EPOLL_CTL_ADD) {
if (is_file_epoll(tfile)) {
error = -ELOOP;
- if (ep_loop_check(ep, tfile) != 0)
+ if (ep_loop_check(ep, tfile) != 0) {
+ clear_tfile_check_list();
goto error_tgt_fput;
+ }
} else
list_add(&tfile->f_tfile_llink, &tfile_check_list);
}
diff --git a/fs/exec.c b/fs/exec.c
index b1fd2025e59a..d038968b54b4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1245,6 +1245,13 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
bprm->unsafe |= LSM_UNSAFE_PTRACE;
}
+ /*
+ * This isn't strictly necessary, but it makes it harder for LSMs to
+ * mess up.
+ */
+ if (current->no_new_privs)
+ bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
+
n_fs = 1;
spin_lock(&p->fs->lock);
rcu_read_lock();
@@ -1288,7 +1295,8 @@ int prepare_binprm(struct linux_binprm *bprm)
bprm->cred->euid = current_euid();
bprm->cred->egid = current_egid();
- if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
+ if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
+ !current->no_new_privs) {
/* Set-uid? */
if (mode & S_ISUID) {
bprm->per_clear |= PER_CLEAR_ON_SETID;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index dffb86536285..f663a67d7bf0 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -79,7 +79,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
struct dentry *ext2_get_parent(struct dentry *child)
{
- struct qstr dotdot = {.name = "..", .len = 2};
+ struct qstr dotdot = QSTR_INIT("..", 2);
unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot);
if (!ino)
return ERR_PTR(-ENOENT);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index d7940b24cf68..eeb63dfc5d20 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1045,7 +1045,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
struct dentry *ext3_get_parent(struct dentry *child)
{
unsigned long ino;
- struct qstr dotdot = {.name = "..", .len = 2};
+ struct qstr dotdot = QSTR_INIT("..", 2);
struct ext3_dir_entry_2 * de;
struct buffer_head *bh;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 349d7b3671c8..e2a3f4b0ff78 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1052,10 +1052,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
struct dentry *ext4_get_parent(struct dentry *child)
{
__u32 ino;
- static const struct qstr dotdot = {
- .name = "..",
- .len = 2,
- };
+ static const struct qstr dotdot = QSTR_INIT("..", 2);
struct ext4_dir_entry_2 * de;
struct buffer_head *bh;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6da193564e43..e1fb1d5de58e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1597,7 +1597,9 @@ static int parse_options(char *options, struct super_block *sb,
unsigned int *journal_ioprio,
int is_remount)
{
+#ifdef CONFIG_QUOTA
struct ext4_sb_info *sbi = EXT4_SB(sb);
+#endif
char *p;
substring_t args[MAX_OPT_ARGS];
int token;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 230eb0f005b6..bd4a5892c93c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -73,12 +73,8 @@ static int gfs2_set_mode(struct inode *inode, umode_t mode)
int error = 0;
if (mode != inode->i_mode) {
- struct iattr iattr;
-
- iattr.ia_valid = ATTR_MODE;
- iattr.ia_mode = mode;
-
- error = gfs2_setattr_simple(inode, &iattr);
+ inode->i_mode = mode;
+ mark_inode_dirty(inode);
}
return error;
@@ -126,9 +122,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
return PTR_ERR(acl);
if (!acl) {
mode &= ~current_umask();
- if (mode != inode->i_mode)
- error = gfs2_set_mode(inode, mode);
- return error;
+ return gfs2_set_mode(inode, mode);
}
if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 9b2ff0e851b1..e80a464850c8 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -36,8 +36,8 @@
#include "glops.h"
-void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
- unsigned int from, unsigned int to)
+static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
+ unsigned int from, unsigned int to)
{
struct buffer_head *head = page_buffers(page);
unsigned int bsize = head->b_size;
@@ -517,15 +517,14 @@ out:
/**
* gfs2_internal_read - read an internal file
* @ip: The gfs2 inode
- * @ra_state: The readahead state (or NULL for no readahead)
* @buf: The buffer to fill
* @pos: The file position
* @size: The amount to read
*
*/
-int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
- char *buf, loff_t *pos, unsigned size)
+int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
+ unsigned size)
{
struct address_space *mapping = ip->i_inode.i_mapping;
unsigned long index = *pos / PAGE_CACHE_SIZE;
@@ -943,8 +942,8 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh)
clear_buffer_dirty(bh);
bd = bh->b_private;
if (bd) {
- if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh))
- list_del_init(&bd->bd_le.le_list);
+ if (!list_empty(&bd->bd_list) && !buffer_pinned(bh))
+ list_del_init(&bd->bd_list);
else
gfs2_remove_from_journal(bh, current->journal_info, 0);
}
@@ -1084,10 +1083,9 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
bd = bh->b_private;
if (bd) {
gfs2_assert_warn(sdp, bd->bd_bh == bh);
- gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
- if (!list_empty(&bd->bd_le.le_list)) {
+ if (!list_empty(&bd->bd_list)) {
if (!buffer_pinned(bh))
- list_del_init(&bd->bd_le.le_list);
+ list_del_init(&bd->bd_list);
else
bd = NULL;
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 03c04febe26f..dab54099dd98 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -324,7 +324,7 @@ static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
if (!dblock)
return x + 1;
- ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, 0, &mp->mp_bh[x+1]);
+ ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, &mp->mp_bh[x+1]);
if (ret)
return ret;
}
@@ -882,7 +882,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
} else {
- error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
+ error = gfs2_meta_indirect_buffer(ip, height, block, &bh);
if (error)
return error;
@@ -1169,6 +1169,7 @@ static int do_grow(struct inode *inode, u64 size)
struct buffer_head *dibh;
struct gfs2_qadata *qa = NULL;
int error;
+ int unstuff = 0;
if (gfs2_is_stuffed(ip) &&
(size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
@@ -1183,13 +1184,14 @@ static int do_grow(struct inode *inode, u64 size)
error = gfs2_inplace_reserve(ip, 1);
if (error)
goto do_grow_qunlock;
+ unstuff = 1;
}
error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0);
if (error)
goto do_grow_release;
- if (qa) {
+ if (unstuff) {
error = gfs2_unstuff_dinode(ip, NULL);
if (error)
goto do_end_trans;
@@ -1208,7 +1210,7 @@ static int do_grow(struct inode *inode, u64 size)
do_end_trans:
gfs2_trans_end(sdp);
do_grow_release:
- if (qa) {
+ if (unstuff) {
gfs2_inplace_release(ip);
do_grow_qunlock:
gfs2_quota_unlock(ip);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index a836056343f0..8aaeb07a07b5 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -821,7 +821,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
struct buffer_head *bh;
struct gfs2_leaf *leaf;
struct gfs2_dirent *dent;
- struct qstr name = { .name = "", .len = 0, .hash = 0 };
+ struct qstr name = { .name = "" };
error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
if (error)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a3d2c9ee8d66..31b199f6efc1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -558,14 +558,14 @@ fail:
}
/**
- * gfs2_close - called to close a struct file
+ * gfs2_release - called to close a struct file
* @inode: the inode the struct file belongs to
* @file: the struct file being closed
*
* Returns: errno
*/
-static int gfs2_close(struct inode *inode, struct file *file)
+static int gfs2_release(struct inode *inode, struct file *file)
{
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
struct gfs2_file *fp;
@@ -1005,7 +1005,7 @@ const struct file_operations gfs2_file_fops = {
.unlocked_ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
.open = gfs2_open,
- .release = gfs2_close,
+ .release = gfs2_release,
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
@@ -1019,7 +1019,7 @@ const struct file_operations gfs2_dir_fops = {
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
- .release = gfs2_close,
+ .release = gfs2_release,
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
@@ -1037,7 +1037,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.unlocked_ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
.open = gfs2_open,
- .release = gfs2_close,
+ .release = gfs2_release,
.fsync = gfs2_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
@@ -1049,7 +1049,7 @@ const struct file_operations gfs2_dir_fops_nolock = {
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
- .release = gfs2_close,
+ .release = gfs2_release,
.fsync = gfs2_fsync,
.llseek = default_llseek,
};
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 1656df7aacd2..4bdcf3784187 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -94,7 +94,6 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
/* A shortened, inline version of gfs2_trans_begin() */
tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
tr.tr_ip = (unsigned long)__builtin_return_address(0);
- INIT_LIST_HEAD(&tr.tr_list_buf);
gfs2_log_reserve(sdp, tr.tr_reserved);
BUG_ON(current->journal_info);
current->journal_info = &tr;
@@ -379,11 +378,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
if (error)
return error;
- if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
- brelse(dibh);
- return -EIO;
- }
-
error = gfs2_dinode_in(ip, dibh->b_data);
brelse(dibh);
clear_bit(GIF_INVALID, &ip->i_flags);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 47d0bda5ac2b..aa9949e5de26 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -26,7 +26,7 @@
#define DIO_METADATA 0x00000020
struct gfs2_log_operations;
-struct gfs2_log_element;
+struct gfs2_bufdata;
struct gfs2_holder;
struct gfs2_glock;
struct gfs2_quota_data;
@@ -52,7 +52,7 @@ struct gfs2_log_header_host {
*/
struct gfs2_log_operations {
- void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
+ void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
void (*lo_before_commit) (struct gfs2_sbd *sdp);
void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
void (*lo_before_scan) (struct gfs2_jdesc *jd,
@@ -64,11 +64,6 @@ struct gfs2_log_operations {
const char *lo_name;
};
-struct gfs2_log_element {
- struct list_head le_list;
- const struct gfs2_log_operations *le_ops;
-};
-
#define GBF_FULL 1
struct gfs2_bitmap {
@@ -118,15 +113,10 @@ TAS_BUFFER_FNS(Zeronew, zeronew)
struct gfs2_bufdata {
struct buffer_head *bd_bh;
struct gfs2_glock *bd_gl;
+ u64 bd_blkno;
- union {
- struct list_head list_tr;
- u64 blkno;
- } u;
-#define bd_list_tr u.list_tr
-#define bd_blkno u.blkno
-
- struct gfs2_log_element bd_le;
+ struct list_head bd_list;
+ const struct gfs2_log_operations *bd_ops;
struct gfs2_ail *bd_ail;
struct list_head bd_ail_st_list;
@@ -411,13 +401,10 @@ struct gfs2_trans {
int tr_touched;
- unsigned int tr_num_buf;
unsigned int tr_num_buf_new;
unsigned int tr_num_databuf_new;
unsigned int tr_num_buf_rm;
unsigned int tr_num_databuf_rm;
- struct list_head tr_list_buf;
-
unsigned int tr_num_revoke;
unsigned int tr_num_revoke_rm;
};
@@ -699,7 +686,6 @@ struct gfs2_sbd {
struct list_head sd_log_le_buf;
struct list_head sd_log_le_revoke;
- struct list_head sd_log_le_rg;
struct list_head sd_log_le_databuf;
struct list_head sd_log_le_ordered;
@@ -716,7 +702,9 @@ struct gfs2_sbd {
struct rw_semaphore sd_log_flush_lock;
atomic_t sd_log_in_flight;
+ struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
+ int sd_log_error;
unsigned int sd_log_flush_head;
u64 sd_log_flush_wrapped;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 276e7b52b658..c53c7477f6da 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -17,10 +17,7 @@
extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
extern int gfs2_internal_read(struct gfs2_inode *ip,
- struct file_ra_state *ra_state,
char *buf, loff_t *pos, unsigned size);
-extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
- unsigned int from, unsigned int to);
extern void gfs2_set_aops(struct inode *inode);
static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index f8411bd1b805..5f5e70e047dc 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -200,10 +200,11 @@ static int make_mode(const unsigned int lmstate)
return -1;
}
-static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
+static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
const int req)
{
u32 lkf = DLM_LKF_VALBLK;
+ u32 lkid = gl->gl_lksb.sb_lkid;
if (gfs_flags & LM_FLAG_TRY)
lkf |= DLM_LKF_NOQUEUE;
@@ -227,8 +228,11 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
BUG();
}
- if (lkid != 0)
+ if (lkid != 0) {
lkf |= DLM_LKF_CONVERT;
+ if (test_bit(GLF_BLOCKING, &gl->gl_flags))
+ lkf |= DLM_LKF_QUECVT;
+ }
return lkf;
}
@@ -250,7 +254,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
char strname[GDLM_STRNAME_BYTES] = "";
req = make_mode(req_state);
- lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
+ lkf = make_flags(gl, flags, req);
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
if (gl->gl_lksb.sb_lkid) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 4752eadc7f6e..f4beeb9c81c1 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -32,8 +32,6 @@
#include "dir.h"
#include "trace_gfs2.h"
-#define PULL 1
-
/**
* gfs2_struct2blk - compute stuff
* @sdp: the filesystem
@@ -359,18 +357,6 @@ retry:
return 0;
}
-u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
-{
- struct gfs2_journal_extent *je;
-
- list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) {
- if (lbn >= je->lblock && lbn < je->lblock + je->blocks)
- return je->dblock + lbn - je->lblock;
- }
-
- return -1;
-}
-
/**
* log_distance - Compute distance between two journal blocks
* @sdp: The GFS2 superblock
@@ -466,17 +452,6 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
return tail;
}
-void gfs2_log_incr_head(struct gfs2_sbd *sdp)
-{
- BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
- (sdp->sd_log_flush_head != sdp->sd_log_head));
-
- if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
- sdp->sd_log_flush_head = 0;
- sdp->sd_log_flush_wrapped = 1;
- }
-}
-
static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
{
unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
@@ -511,8 +486,8 @@ static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct gfs2_bufdata *bda, *bdb;
- bda = list_entry(a, struct gfs2_bufdata, bd_le.le_list);
- bdb = list_entry(b, struct gfs2_bufdata, bd_le.le_list);
+ bda = list_entry(a, struct gfs2_bufdata, bd_list);
+ bdb = list_entry(b, struct gfs2_bufdata, bd_list);
if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
return -1;
@@ -530,8 +505,8 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
gfs2_log_lock(sdp);
list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
while (!list_empty(&sdp->sd_log_le_ordered)) {
- bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list);
- list_move(&bd->bd_le.le_list, &written);
+ bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list);
+ list_move(&bd->bd_list, &written);
bh = bd->bd_bh;
if (!buffer_dirty(bh))
continue;
@@ -558,7 +533,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
gfs2_log_lock(sdp);
while (!list_empty(&sdp->sd_log_le_ordered)) {
- bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list);
+ bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list);
bh = bd->bd_bh;
if (buffer_locked(bh)) {
get_bh(bh);
@@ -568,7 +543,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
gfs2_log_lock(sdp);
continue;
}
- list_del_init(&bd->bd_le.le_list);
+ list_del_init(&bd->bd_list);
}
gfs2_log_unlock(sdp);
}
@@ -580,25 +555,19 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
* Returns: the initialized log buffer descriptor
*/
-static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
+static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
{
- u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
- struct buffer_head *bh;
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
-
- bh = sb_getblk(sdp->sd_vfs, blkno);
- lock_buffer(bh);
- memset(bh->b_data, 0, bh->b_size);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
+ int rw = WRITE_FLUSH_FUA | REQ_META;
+ struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+ lh = page_address(page);
+ clear_page(lh);
gfs2_ail1_empty(sdp);
tail = current_tail(sdp);
- lh = (struct gfs2_log_header *)bh->b_data;
- memset(lh, 0, sizeof(struct gfs2_log_header));
lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
lh->lh_header.__pad0 = cpu_to_be64(0);
@@ -608,31 +577,22 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
lh->lh_flags = cpu_to_be32(flags);
lh->lh_tail = cpu_to_be32(tail);
lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
- hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
+ hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header));
lh->lh_hash = cpu_to_be32(hash);
- bh->b_end_io = end_buffer_write_sync;
- get_bh(bh);
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
gfs2_ordered_wait(sdp);
log_flush_wait(sdp);
- submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
- } else {
- submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
+ rw = WRITE_SYNC | REQ_META | REQ_PRIO;
}
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- gfs2_io_error_bh(sdp, bh);
- brelse(bh);
+ sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
+ gfs2_log_write_page(sdp, page);
+ gfs2_log_flush_bio(sdp, rw);
+ log_flush_wait(sdp);
if (sdp->sd_log_tail != tail)
log_pull_tail(sdp, tail);
- else
- gfs2_assert_withdraw(sdp, !pull);
-
- sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
- gfs2_log_incr_head(sdp);
}
/**
@@ -678,15 +638,14 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
gfs2_ordered_write(sdp);
lops_before_commit(sdp);
+ gfs2_log_flush_bio(sdp, WRITE);
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
- log_write_header(sdp, 0, 0);
+ log_write_header(sdp, 0);
} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
- gfs2_log_lock(sdp);
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
trace_gfs2_log_blocks(sdp, -1);
- gfs2_log_unlock(sdp);
- log_write_header(sdp, 0, PULL);
+ log_write_header(sdp, 0);
}
lops_after_commit(sdp, ai);
@@ -735,21 +694,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
gfs2_log_unlock(sdp);
}
-static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
-{
- struct list_head *head = &tr->tr_list_buf;
- struct gfs2_bufdata *bd;
-
- gfs2_log_lock(sdp);
- while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
- list_del_init(&bd->bd_list_tr);
- tr->tr_num_buf--;
- }
- gfs2_log_unlock(sdp);
- gfs2_assert_warn(sdp, !tr->tr_num_buf);
-}
-
/**
* gfs2_log_commit - Commit a transaction to the log
* @sdp: the filesystem
@@ -768,8 +712,6 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
log_refund(sdp, tr);
- buf_lo_incore_commit(sdp, tr);
-
up_read(&sdp->sd_log_flush_lock);
if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
@@ -798,8 +740,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
sdp->sd_log_flush_head = sdp->sd_log_head;
sdp->sd_log_flush_wrapped = 0;
- log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
- (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
+ log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT);
gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
@@ -854,11 +795,9 @@ int gfs2_logd(void *data)
struct gfs2_sbd *sdp = data;
unsigned long t = 1;
DEFINE_WAIT(wait);
- unsigned preflush;
while (!kthread_should_stop()) {
- preflush = atomic_read(&sdp->sd_log_pinned);
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
gfs2_ail1_empty(sdp);
gfs2_log_flush(sdp, NULL);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index ff07454b582c..3fd5215ea25f 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -52,8 +52,6 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
unsigned int ssize);
extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
-extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
-extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp, unsigned int lbn);
extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 6b1efb594d90..852c1be1dd3b 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -127,146 +127,277 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
atomic_dec(&sdp->sd_log_pinned);
}
-
-static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh)
+static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
{
- return (struct gfs2_log_descriptor *)bh->b_data;
+ BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
+ (sdp->sd_log_flush_head != sdp->sd_log_head));
+
+ if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
+ sdp->sd_log_flush_head = 0;
+ sdp->sd_log_flush_wrapped = 1;
+ }
}
-static inline __be64 *bh_log_ptr(struct buffer_head *bh)
+static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
{
- struct gfs2_log_descriptor *ld = bh_log_desc(bh);
- return (__force __be64 *)(ld + 1);
+ unsigned int lbn = sdp->sd_log_flush_head;
+ struct gfs2_journal_extent *je;
+ u64 block;
+
+ list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) {
+ if (lbn >= je->lblock && lbn < je->lblock + je->blocks) {
+ block = je->dblock + lbn - je->lblock;
+ gfs2_log_incr_head(sdp);
+ return block;
+ }
+ }
+
+ return -1;
}
-static inline __be64 *bh_ptr_end(struct buffer_head *bh)
+/**
+ * gfs2_end_log_write_bh - end log write of pagecache data with buffers
+ * @sdp: The superblock
+ * @bvec: The bio_vec
+ * @error: The i/o status
+ *
+ * This finds the relavent buffers and unlocks then and sets the
+ * error flag according to the status of the i/o request. This is
+ * used when the log is writing data which has an in-place version
+ * that is pinned in the pagecache.
+ */
+
+static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
+ int error)
{
- return (__force __be64 *)(bh->b_data + bh->b_size);
+ struct buffer_head *bh, *next;
+ struct page *page = bvec->bv_page;
+ unsigned size;
+
+ bh = page_buffers(page);
+ size = bvec->bv_len;
+ while (bh_offset(bh) < bvec->bv_offset)
+ bh = bh->b_this_page;
+ do {
+ if (error)
+ set_buffer_write_io_error(bh);
+ unlock_buffer(bh);
+ next = bh->b_this_page;
+ size -= bh->b_size;
+ brelse(bh);
+ bh = next;
+ } while(bh && size);
}
/**
- * gfs2_log_write_endio - End of I/O for a log buffer
- * @bh: The buffer head
- * @uptodate: I/O Status
+ * gfs2_end_log_write - end of i/o to the log
+ * @bio: The bio
+ * @error: Status of i/o request
+ *
+ * Each bio_vec contains either data from the pagecache or data
+ * relating to the log itself. Here we iterate over the bio_vec
+ * array, processing both kinds of data.
*
*/
-static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate)
+static void gfs2_end_log_write(struct bio *bio, int error)
{
- struct gfs2_sbd *sdp = bh->b_private;
- bh->b_private = NULL;
+ struct gfs2_sbd *sdp = bio->bi_private;
+ struct bio_vec *bvec;
+ struct page *page;
+ int i;
- end_buffer_write_sync(bh, uptodate);
+ if (error) {
+ sdp->sd_log_error = error;
+ fs_err(sdp, "Error %d writing to log\n", error);
+ }
+
+ bio_for_each_segment(bvec, bio, i) {
+ page = bvec->bv_page;
+ if (page_has_buffers(page))
+ gfs2_end_log_write_bh(sdp, bvec, error);
+ else
+ mempool_free(page, gfs2_page_pool);
+ }
+
+ bio_put(bio);
if (atomic_dec_and_test(&sdp->sd_log_in_flight))
wake_up(&sdp->sd_log_flush_wait);
}
/**
- * gfs2_log_get_buf - Get and initialize a buffer to use for log control data
- * @sdp: The GFS2 superblock
+ * gfs2_log_flush_bio - Submit any pending log bio
+ * @sdp: The superblock
+ * @rw: The rw flags
*
- * tReturns: the buffer_head
+ * Submit any pending part-built or full bio to the block device. If
+ * there is no pending bio, then this is a no-op.
*/
-static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
+void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
{
- u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
- struct buffer_head *bh;
+ if (sdp->sd_log_bio) {
+ atomic_inc(&sdp->sd_log_in_flight);
+ submit_bio(rw, sdp->sd_log_bio);
+ sdp->sd_log_bio = NULL;
+ }
+}
- bh = sb_getblk(sdp->sd_vfs, blkno);
- lock_buffer(bh);
- memset(bh->b_data, 0, bh->b_size);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- gfs2_log_incr_head(sdp);
- atomic_inc(&sdp->sd_log_in_flight);
- bh->b_private = sdp;
- bh->b_end_io = gfs2_log_write_endio;
+/**
+ * gfs2_log_alloc_bio - Allocate a new bio for log writing
+ * @sdp: The superblock
+ * @blkno: The next device block number we want to write to
+ *
+ * This should never be called when there is a cached bio in the
+ * super block. When it returns, there will be a cached bio in the
+ * super block which will have as many bio_vecs as the device is
+ * happy to handle.
+ *
+ * Returns: Newly allocated bio
+ */
- return bh;
+static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ unsigned nrvecs = bio_get_nr_vecs(sb->s_bdev);
+ struct bio *bio;
+
+ BUG_ON(sdp->sd_log_bio);
+
+ while (1) {
+ bio = bio_alloc(GFP_NOIO, nrvecs);
+ if (likely(bio))
+ break;
+ nrvecs = max(nrvecs/2, 1U);
+ }
+
+ bio->bi_sector = blkno * (sb->s_blocksize >> 9);
+ bio->bi_bdev = sb->s_bdev;
+ bio->bi_end_io = gfs2_end_log_write;
+ bio->bi_private = sdp;
+
+ sdp->sd_log_bio = bio;
+
+ return bio;
}
/**
- * gfs2_fake_write_endio -
- * @bh: The buffer head
- * @uptodate: The I/O Status
+ * gfs2_log_get_bio - Get cached log bio, or allocate a new one
+ * @sdp: The superblock
+ * @blkno: The device block number we want to write to
+ *
+ * If there is a cached bio, then if the next block number is sequential
+ * with the previous one, return it, otherwise flush the bio to the
+ * device. If there is not a cached bio, or we just flushed it, then
+ * allocate a new one.
*
+ * Returns: The bio to use for log writes
*/
-static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate)
+static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
{
- struct buffer_head *real_bh = bh->b_private;
- struct gfs2_bufdata *bd = real_bh->b_private;
- struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd;
+ struct bio *bio = sdp->sd_log_bio;
+ u64 nblk;
+
+ if (bio) {
+ nblk = bio->bi_sector + bio_sectors(bio);
+ nblk >>= sdp->sd_fsb2bb_shift;
+ if (blkno == nblk)
+ return bio;
+ gfs2_log_flush_bio(sdp, WRITE);
+ }
- end_buffer_write_sync(bh, uptodate);
- mempool_free(bh, gfs2_bh_pool);
- unlock_buffer(real_bh);
- brelse(real_bh);
- if (atomic_dec_and_test(&sdp->sd_log_in_flight))
- wake_up(&sdp->sd_log_flush_wait);
+ return gfs2_log_alloc_bio(sdp, blkno);
}
+
/**
- * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
+ * gfs2_log_write - write to log
* @sdp: the filesystem
- * @data: the data the buffer_head should point to
+ * @page: the page to write
+ * @size: the size of the data to write
+ * @offset: the offset within the page
*
- * Returns: the log buffer descriptor
+ * Try and add the page segment to the current bio. If that fails,
+ * submit the current bio to the device and create a new one, and
+ * then add the page segment to that.
*/
-static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
- struct buffer_head *real)
+static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
+ unsigned size, unsigned offset)
{
- u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head);
- struct buffer_head *bh;
+ u64 blkno = gfs2_log_bmap(sdp);
+ struct bio *bio;
+ int ret;
+
+ bio = gfs2_log_get_bio(sdp, blkno);
+ ret = bio_add_page(bio, page, size, offset);
+ if (ret == 0) {
+ gfs2_log_flush_bio(sdp, WRITE);
+ bio = gfs2_log_alloc_bio(sdp, blkno);
+ ret = bio_add_page(bio, page, size, offset);
+ WARN_ON(ret == 0);
+ }
+}
+
+/**
+ * gfs2_log_write_bh - write a buffer's content to the log
+ * @sdp: The super block
+ * @bh: The buffer pointing to the in-place location
+ *
+ * This writes the content of the buffer to the next available location
+ * in the log. The buffer will be unlocked once the i/o to the log has
+ * completed.
+ */
- bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS);
- atomic_set(&bh->b_count, 1);
- bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
- set_bh_page(bh, real->b_page, bh_offset(real));
- bh->b_blocknr = blkno;
- bh->b_size = sdp->sd_sb.sb_bsize;
- bh->b_bdev = sdp->sd_vfs->s_bdev;
- bh->b_private = real;
- bh->b_end_io = gfs2_fake_write_endio;
+static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
+{
+ gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh));
+}
- gfs2_log_incr_head(sdp);
- atomic_inc(&sdp->sd_log_in_flight);
+/**
+ * gfs2_log_write_page - write one block stored in a page, into the log
+ * @sdp: The superblock
+ * @page: The struct page
+ *
+ * This writes the first block-sized part of the page into the log. Note
+ * that the page must have been allocated from the gfs2_page_pool mempool
+ * and that after this has been called, ownership has been transferred and
+ * the page may be freed at any time.
+ */
- return bh;
+void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ gfs2_log_write(sdp, page, sb->s_blocksize, 0);
}
-static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
+static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
+ u32 ld_length, u32 ld_data1)
{
- struct buffer_head *bh = gfs2_log_get_buf(sdp);
- struct gfs2_log_descriptor *ld = bh_log_desc(bh);
+ struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+ struct gfs2_log_descriptor *ld = page_address(page);
+ clear_page(ld);
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(ld_type);
- ld->ld_length = 0;
- ld->ld_data1 = 0;
+ ld->ld_length = cpu_to_be32(ld_length);
+ ld->ld_data1 = cpu_to_be32(ld_data1);
ld->ld_data2 = 0;
- memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
- return bh;
+ return page;
}
-static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
- struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_meta_header *mh;
struct gfs2_trans *tr;
lock_buffer(bd->bd_bh);
gfs2_log_lock(sdp);
- if (!list_empty(&bd->bd_list_tr))
- goto out;
tr = current->journal_info;
tr->tr_touched = 1;
- tr->tr_num_buf++;
- list_add(&bd->bd_list_tr, &tr->tr_list_buf);
- if (!list_empty(&le->le_list))
+ if (!list_empty(&bd->bd_list))
goto out;
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
@@ -276,62 +407,86 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
mh->__pad0 = cpu_to_be64(0);
mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
sdp->sd_log_num_buf++;
- list_add(&le->le_list, &sdp->sd_log_le_buf);
+ list_add(&bd->bd_list, &sdp->sd_log_le_buf);
tr->tr_num_buf_new++;
out:
gfs2_log_unlock(sdp);
unlock_buffer(bd->bd_bh);
}
-static void buf_lo_before_commit(struct gfs2_sbd *sdp)
+static void gfs2_check_magic(struct buffer_head *bh)
+{
+ void *kaddr;
+ __be32 *ptr;
+
+ clear_buffer_escaped(bh);
+ kaddr = kmap_atomic(bh->b_page);
+ ptr = kaddr + bh_offset(bh);
+ if (*ptr == cpu_to_be32(GFS2_MAGIC))
+ set_buffer_escaped(bh);
+ kunmap_atomic(kaddr);
+}
+
+static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
+ unsigned int total, struct list_head *blist,
+ bool is_databuf)
{
- struct buffer_head *bh;
struct gfs2_log_descriptor *ld;
struct gfs2_bufdata *bd1 = NULL, *bd2;
- unsigned int total;
- unsigned int limit;
+ struct page *page;
unsigned int num;
unsigned n;
__be64 *ptr;
- limit = buf_limit(sdp);
- /* for 4k blocks, limit = 503 */
-
gfs2_log_lock(sdp);
- total = sdp->sd_log_num_buf;
- bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
+ bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
while(total) {
num = total;
if (total > limit)
num = limit;
gfs2_log_unlock(sdp);
- bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA);
+ page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA, num + 1, num);
+ ld = page_address(page);
gfs2_log_lock(sdp);
- ld = bh_log_desc(bh);
- ptr = bh_log_ptr(bh);
- ld->ld_length = cpu_to_be32(num + 1);
- ld->ld_data1 = cpu_to_be32(num);
+ ptr = (__be64 *)(ld + 1);
n = 0;
- list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
- bd_le.le_list) {
+ list_for_each_entry_continue(bd1, blist, bd_list) {
*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
+ if (is_databuf) {
+ gfs2_check_magic(bd1->bd_bh);
+ *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
+ }
if (++n >= num)
break;
}
gfs2_log_unlock(sdp);
- submit_bh(WRITE_SYNC, bh);
+ gfs2_log_write_page(sdp, page);
gfs2_log_lock(sdp);
n = 0;
- list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
- bd_le.le_list) {
+ list_for_each_entry_continue(bd2, blist, bd_list) {
get_bh(bd2->bd_bh);
gfs2_log_unlock(sdp);
lock_buffer(bd2->bd_bh);
- bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
- submit_bh(WRITE_SYNC, bh);
+
+ if (buffer_escaped(bd2->bd_bh)) {
+ void *kaddr;
+ page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+ ptr = page_address(page);
+ kaddr = kmap_atomic(bd2->bd_bh->b_page);
+ memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
+ bd2->bd_bh->b_size);
+ kunmap_atomic(kaddr);
+ *(__be32 *)ptr = 0;
+ clear_buffer_escaped(bd2->bd_bh);
+ unlock_buffer(bd2->bd_bh);
+ brelse(bd2->bd_bh);
+ gfs2_log_write_page(sdp, page);
+ } else {
+ gfs2_log_write_bh(sdp, bd2->bd_bh);
+ }
gfs2_log_lock(sdp);
if (++n >= num)
break;
@@ -343,14 +498,22 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
gfs2_log_unlock(sdp);
}
+static void buf_lo_before_commit(struct gfs2_sbd *sdp)
+{
+ unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
+
+ gfs2_before_commit(sdp, limit, sdp->sd_log_num_buf,
+ &sdp->sd_log_le_buf, 0);
+}
+
static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &sdp->sd_log_le_buf;
struct gfs2_bufdata *bd;
while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
- list_del_init(&bd->bd_le.le_list);
+ bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+ list_del_init(&bd->bd_list);
sdp->sd_log_num_buf--;
gfs2_unpin(sdp, bd->bd_bh, ai);
@@ -437,9 +600,8 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
}
-static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
- struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_glock *gl = bd->bd_gl;
struct gfs2_trans *tr;
@@ -449,48 +611,48 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
sdp->sd_log_num_revoke++;
atomic_inc(&gl->gl_revokes);
set_bit(GLF_LFLUSH, &gl->gl_flags);
- list_add(&le->le_list, &sdp->sd_log_le_revoke);
+ list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
}
static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
{
struct gfs2_log_descriptor *ld;
struct gfs2_meta_header *mh;
- struct buffer_head *bh;
unsigned int offset;
struct list_head *head = &sdp->sd_log_le_revoke;
struct gfs2_bufdata *bd;
+ struct page *page;
+ unsigned int length;
if (!sdp->sd_log_num_revoke)
return;
- bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE);
- ld = bh_log_desc(bh);
- ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
- sizeof(u64)));
- ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
+ length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
+ page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
+ ld = page_address(page);
offset = sizeof(struct gfs2_log_descriptor);
- list_for_each_entry(bd, head, bd_le.le_list) {
+ list_for_each_entry(bd, head, bd_list) {
sdp->sd_log_num_revoke--;
if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
- submit_bh(WRITE_SYNC, bh);
- bh = gfs2_log_get_buf(sdp);
- mh = (struct gfs2_meta_header *)bh->b_data;
+ gfs2_log_write_page(sdp, page);
+ page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+ mh = page_address(page);
+ clear_page(mh);
mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
offset = sizeof(struct gfs2_meta_header);
}
- *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
+ *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
offset += sizeof(u64);
}
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
- submit_bh(WRITE_SYNC, bh);
+ gfs2_log_write_page(sdp, page);
}
static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
@@ -500,8 +662,8 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
struct gfs2_glock *gl;
while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
- list_del_init(&bd->bd_le.le_list);
+ bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+ list_del_init(&bd->bd_list);
gl = bd->bd_gl;
atomic_dec(&gl->gl_revokes);
clear_bit(GLF_LFLUSH, &gl->gl_flags);
@@ -604,108 +766,33 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
* blocks, which isn't an enormous overhead but twice as much as
* for normal metadata blocks.
*/
-static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
- struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_trans *tr = current->journal_info;
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
lock_buffer(bd->bd_bh);
gfs2_log_lock(sdp);
- if (tr) {
- if (!list_empty(&bd->bd_list_tr))
- goto out;
+ if (tr)
tr->tr_touched = 1;
- if (gfs2_is_jdata(ip)) {
- tr->tr_num_buf++;
- list_add(&bd->bd_list_tr, &tr->tr_list_buf);
- }
- }
- if (!list_empty(&le->le_list))
+ if (!list_empty(&bd->bd_list))
goto out;
-
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
if (gfs2_is_jdata(ip)) {
gfs2_pin(sdp, bd->bd_bh);
tr->tr_num_databuf_new++;
sdp->sd_log_num_databuf++;
- list_add_tail(&le->le_list, &sdp->sd_log_le_databuf);
+ list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
} else {
- list_add_tail(&le->le_list, &sdp->sd_log_le_ordered);
+ list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
}
out:
gfs2_log_unlock(sdp);
unlock_buffer(bd->bd_bh);
}
-static void gfs2_check_magic(struct buffer_head *bh)
-{
- void *kaddr;
- __be32 *ptr;
-
- clear_buffer_escaped(bh);
- kaddr = kmap_atomic(bh->b_page);
- ptr = kaddr + bh_offset(bh);
- if (*ptr == cpu_to_be32(GFS2_MAGIC))
- set_buffer_escaped(bh);
- kunmap_atomic(kaddr);
-}
-
-static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
- struct list_head *list, struct list_head *done,
- unsigned int n)
-{
- struct buffer_head *bh1;
- struct gfs2_log_descriptor *ld;
- struct gfs2_bufdata *bd;
- __be64 *ptr;
-
- if (!bh)
- return;
-
- ld = bh_log_desc(bh);
- ld->ld_length = cpu_to_be32(n + 1);
- ld->ld_data1 = cpu_to_be32(n);
-
- ptr = bh_log_ptr(bh);
-
- get_bh(bh);
- submit_bh(WRITE_SYNC, bh);
- gfs2_log_lock(sdp);
- while(!list_empty(list)) {
- bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
- list_move_tail(&bd->bd_le.le_list, done);
- get_bh(bd->bd_bh);
- while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) {
- gfs2_log_incr_head(sdp);
- ptr += 2;
- }
- gfs2_log_unlock(sdp);
- lock_buffer(bd->bd_bh);
- if (buffer_escaped(bd->bd_bh)) {
- void *kaddr;
- bh1 = gfs2_log_get_buf(sdp);
- kaddr = kmap_atomic(bd->bd_bh->b_page);
- memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
- bh1->b_size);
- kunmap_atomic(kaddr);
- *(__be32 *)bh1->b_data = 0;
- clear_buffer_escaped(bd->bd_bh);
- unlock_buffer(bd->bd_bh);
- brelse(bd->bd_bh);
- } else {
- bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
- }
- submit_bh(WRITE_SYNC, bh1);
- gfs2_log_lock(sdp);
- ptr += 2;
- }
- gfs2_log_unlock(sdp);
- brelse(bh);
-}
-
/**
* databuf_lo_before_commit - Scan the data buffers, writing as we go
*
@@ -713,37 +800,10 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
- struct gfs2_bufdata *bd = NULL;
- struct buffer_head *bh = NULL;
- unsigned int n = 0;
- __be64 *ptr = NULL, *end = NULL;
- LIST_HEAD(processed);
- LIST_HEAD(in_progress);
+ unsigned int limit = buf_limit(sdp) / 2;
- gfs2_log_lock(sdp);
- while (!list_empty(&sdp->sd_log_le_databuf)) {
- if (ptr == end) {
- gfs2_log_unlock(sdp);
- gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
- n = 0;
- bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
- ptr = bh_log_ptr(bh);
- end = bh_ptr_end(bh) - 1;
- gfs2_log_lock(sdp);
- continue;
- }
- bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
- list_move_tail(&bd->bd_le.le_list, &in_progress);
- gfs2_check_magic(bd->bd_bh);
- *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
- *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0);
- n++;
- }
- gfs2_log_unlock(sdp);
- gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
- gfs2_log_lock(sdp);
- list_splice(&processed, &sdp->sd_log_le_databuf);
- gfs2_log_unlock(sdp);
+ gfs2_before_commit(sdp, limit, sdp->sd_log_num_databuf,
+ &sdp->sd_log_le_databuf, 1);
}
static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
@@ -822,8 +882,8 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
struct gfs2_bufdata *bd;
while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
- list_del_init(&bd->bd_le.le_list);
+ bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+ list_del_init(&bd->bd_list);
sdp->sd_log_num_databuf--;
gfs2_unpin(sdp, bd->bd_bh, ai);
}
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 3c0b2737658a..954a330585f4 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -27,6 +27,8 @@ extern const struct gfs2_log_operations gfs2_rg_lops;
extern const struct gfs2_log_operations gfs2_databuf_lops;
extern const struct gfs2_log_operations *gfs2_log_ops[];
+extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
+extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
@@ -44,17 +46,17 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
return limit;
}
-static inline void lops_init_le(struct gfs2_log_element *le,
+static inline void lops_init_le(struct gfs2_bufdata *bd,
const struct gfs2_log_operations *lops)
{
- INIT_LIST_HEAD(&le->le_list);
- le->le_ops = lops;
+ INIT_LIST_HEAD(&bd->bd_list);
+ bd->bd_ops = lops;
}
-static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
+static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
- if (le->le_ops->lo_add)
- le->le_ops->lo_add(sdp, le);
+ if (bd->bd_ops->lo_add)
+ bd->bd_ops->lo_add(sdp, bd);
}
static inline void lops_before_commit(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 754426b1e52c..6cdb0f2a1b09 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -70,16 +70,6 @@ static void gfs2_init_gl_aspace_once(void *foo)
address_space_init_once(mapping);
}
-static void *gfs2_bh_alloc(gfp_t mask, void *data)
-{
- return alloc_buffer_head(mask);
-}
-
-static void gfs2_bh_free(void *ptr, void *data)
-{
- return free_buffer_head(ptr);
-}
-
/**
* init_gfs2_fs - Register GFS2 as a filesystem
*
@@ -143,6 +133,12 @@ static int __init init_gfs2_fs(void)
if (!gfs2_quotad_cachep)
goto fail;
+ gfs2_rsrv_cachep = kmem_cache_create("gfs2_mblk",
+ sizeof(struct gfs2_blkreserv),
+ 0, 0, NULL);
+ if (!gfs2_rsrv_cachep)
+ goto fail;
+
register_shrinker(&qd_shrinker);
error = register_filesystem(&gfs2_fs_type);
@@ -164,8 +160,8 @@ static int __init init_gfs2_fs(void)
if (!gfs2_control_wq)
goto fail_recovery;
- gfs2_bh_pool = mempool_create(1024, gfs2_bh_alloc, gfs2_bh_free, NULL);
- if (!gfs2_bh_pool)
+ gfs2_page_pool = mempool_create_page_pool(64, 0);
+ if (!gfs2_page_pool)
goto fail_control;
gfs2_register_debugfs();
@@ -186,6 +182,9 @@ fail:
unregister_shrinker(&qd_shrinker);
gfs2_glock_exit();
+ if (gfs2_rsrv_cachep)
+ kmem_cache_destroy(gfs2_rsrv_cachep);
+
if (gfs2_quotad_cachep)
kmem_cache_destroy(gfs2_quotad_cachep);
@@ -225,7 +224,8 @@ static void __exit exit_gfs2_fs(void)
rcu_barrier();
- mempool_destroy(gfs2_bh_pool);
+ mempool_destroy(gfs2_page_pool);
+ kmem_cache_destroy(gfs2_rsrv_cachep);
kmem_cache_destroy(gfs2_quotad_cachep);
kmem_cache_destroy(gfs2_rgrpd_cachep);
kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 181586e673f9..6c1e5d1c404a 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -293,11 +293,10 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
bd->bd_bh = bh;
bd->bd_gl = gl;
- INIT_LIST_HEAD(&bd->bd_list_tr);
if (meta)
- lops_init_le(&bd->bd_le, &gfs2_buf_lops);
+ lops_init_le(bd, &gfs2_buf_lops);
else
- lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
+ lops_init_le(bd, &gfs2_databuf_lops);
bh->b_private = bd;
if (meta)
@@ -313,7 +312,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
if (test_clear_buffer_pinned(bh)) {
trace_gfs2_pin(bd, 0);
atomic_dec(&sdp->sd_log_pinned);
- list_del_init(&bd->bd_le.le_list);
+ list_del_init(&bd->bd_list);
if (meta) {
gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
sdp->sd_log_num_buf--;
@@ -375,33 +374,24 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
* @ip: The GFS2 inode
* @height: The level of this buf in the metadata (indir addr) tree (if any)
* @num: The block number (device relative) of the buffer
- * @new: Non-zero if we may create a new buffer
* @bhp: the buffer is returned here
*
* Returns: errno
*/
int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
- int new, struct buffer_head **bhp)
+ struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_glock *gl = ip->i_gl;
struct buffer_head *bh;
int ret = 0;
+ u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
- if (new) {
- BUG_ON(height == 0);
- bh = gfs2_meta_new(gl, num);
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
- gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
- } else {
- u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
- ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
- if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
- brelse(bh);
- ret = -EIO;
- }
+ ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
+ if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
+ brelse(bh);
+ ret = -EIO;
}
*bhp = bh;
return ret;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 22c526593131..c30973b07a7c 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -65,12 +65,12 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
- int new, struct buffer_head **bhp);
+ struct buffer_head **bhp);
static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
struct buffer_head **bhp)
{
- return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
+ return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, bhp);
}
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6f3a18f9e176..c5871ae40561 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,7 +99,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
atomic_set(&sdp->sd_log_pinned, 0);
INIT_LIST_HEAD(&sdp->sd_log_le_buf);
INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
- INIT_LIST_HEAD(&sdp->sd_log_le_rg);
INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6019da3dcaed..b97178e7d397 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -652,7 +652,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
}
memset(&q, 0, sizeof(struct gfs2_quota));
- err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q));
+ err = gfs2_internal_read(ip, (char *)&q, &loc, sizeof(q));
if (err < 0)
return err;
@@ -744,7 +744,7 @@ get_a_page:
i_size_write(inode, size);
inode->i_mtime = inode->i_atime = CURRENT_TIME;
mark_inode_dirty(inode);
- return err;
+ return 0;
unlock_out:
unlock_page(page);
@@ -852,7 +852,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
memset(&q, 0, sizeof(struct gfs2_quota));
pos = qd2offset(qd);
- error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q));
+ error = gfs2_internal_read(ip, (char *)&q, &pos, sizeof(q));
if (error < 0)
return error;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3df65c9ab73b..f74fb9bd1973 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -70,15 +70,15 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
/**
* gfs2_setbit - Set a bit in the bitmaps
- * @buffer: the buffer that holds the bitmaps
- * @buflen: the length (in bytes) of the buffer
+ * @rgd: the resource group descriptor
+ * @buf2: the clone buffer that holds the bitmaps
+ * @bi: the bitmap structure
* @block: the block to set
* @new_state: the new state of the block
*
*/
-static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
- unsigned char *buf2, unsigned int offset,
+static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
struct gfs2_bitmap *bi, u32 block,
unsigned char new_state)
{
@@ -86,8 +86,8 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
unsigned int buflen = bi->bi_len;
const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
- byte1 = buf1 + offset + (block / GFS2_NBBY);
- end = buf1 + offset + buflen;
+ byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY);
+ end = bi->bi_bh->b_data + bi->bi_offset + buflen;
BUG_ON(byte1 >= end);
@@ -110,7 +110,7 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
*byte1 ^= (cur_state ^ new_state) << bit;
if (buf2) {
- byte2 = buf2 + offset + (block / GFS2_NBBY);
+ byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY);
cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
*byte2 ^= (cur_state ^ new_state) << bit;
}
@@ -118,6 +118,7 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
/**
* gfs2_testbit - test a bit in the bitmaps
+ * @rgd: the resource group descriptor
* @buffer: the buffer that holds the bitmaps
* @buflen: the length (in bytes) of the buffer
* @block: the block to read
@@ -179,7 +180,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
/**
* gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
* a block in a given allocation state.
- * @buffer: the buffer that holds the bitmaps
+ * @buf: the buffer that holds the bitmaps
* @len: the length (in bytes) of the buffer
* @goal: start search at this block's bit-pair (within @buffer)
* @state: GFS2_BLKST_XXX the state of the block we're looking for.
@@ -231,6 +232,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
/**
* gfs2_bitcount - count the number of bits in a certain state
+ * @rgd: the resource group descriptor
* @buffer: the buffer that holds the bitmaps
* @buflen: the length (in bytes) of the buffer
* @state: the state of the block we're looking for
@@ -264,7 +266,6 @@ static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,
/**
* gfs2_rgrp_verify - Verify that a resource group is consistent
- * @sdp: the filesystem
* @rgd: the rgrp
*
*/
@@ -322,7 +323,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
/**
* gfs2_blk2rgrpd - Find resource group for a given data/meta block number
* @sdp: The GFS2 superblock
- * @n: The data block number
+ * @blk: The data block number
+ * @exact: True if this needs to be an exact match
*
* Returns: The resource group, or NULL if not found
*/
@@ -380,7 +382,7 @@ struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
/**
* gfs2_rgrpd_get_next - get the next RG
- * @rgd: A RG
+ * @rgd: the resource group descriptor
*
* Returns: The next rgrp
*/
@@ -529,6 +531,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
/**
* gfs2_ri_total - Total up the file system space, according to the rindex.
+ * @sdp: the filesystem
*
*/
u64 gfs2_ri_total(struct gfs2_sbd *sdp)
@@ -537,16 +540,14 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
struct inode *inode = sdp->sd_rindex;
struct gfs2_inode *ip = GFS2_I(inode);
char buf[sizeof(struct gfs2_rindex)];
- struct file_ra_state ra_state;
int error, rgrps;
- file_ra_state_init(&ra_state, inode->i_mapping);
for (rgrps = 0;; rgrps++) {
loff_t pos = rgrps * sizeof(struct gfs2_rindex);
if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
break;
- error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+ error = gfs2_internal_read(ip, buf, &pos,
sizeof(struct gfs2_rindex));
if (error != sizeof(struct gfs2_rindex))
break;
@@ -582,13 +583,12 @@ static int rgd_insert(struct gfs2_rgrpd *rgd)
/**
* read_rindex_entry - Pull in a new resource index entry from the disk
- * @gl: The glock covering the rindex inode
+ * @ip: Pointer to the rindex inode
*
* Returns: 0 on success, > 0 on EOF, error code otherwise
*/
-static int read_rindex_entry(struct gfs2_inode *ip,
- struct file_ra_state *ra_state)
+static int read_rindex_entry(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
@@ -599,7 +599,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
if (pos >= i_size_read(&ip->i_inode))
return 1;
- error = gfs2_internal_read(ip, ra_state, (char *)&buf, &pos,
+ error = gfs2_internal_read(ip, (char *)&buf, &pos,
sizeof(struct gfs2_rindex));
if (error != sizeof(struct gfs2_rindex))
@@ -655,13 +655,10 @@ fail:
static int gfs2_ri_update(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct inode *inode = &ip->i_inode;
- struct file_ra_state ra_state;
int error;
- file_ra_state_init(&ra_state, inode->i_mapping);
do {
- error = read_rindex_entry(ip, &ra_state);
+ error = read_rindex_entry(ip);
} while (error == 0);
if (error < 0)
@@ -741,7 +738,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
/**
* gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
- * @rgd: the struct gfs2_rgrpd describing the RG to read in
+ * @gh: The glock holder for the resource group
*
* Read in all of a Resource Group's header and bitmap blocks.
* Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
@@ -801,7 +798,7 @@ fail:
/**
* gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
- * @rgd: the struct gfs2_rgrpd describing the RG to read in
+ * @gh: The glock holder for the resource group
*
*/
@@ -1002,11 +999,13 @@ struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
* Returns: the struct gfs2_qadata
*/
-static struct gfs2_blkreserv *gfs2_blkrsv_get(struct gfs2_inode *ip)
+static int gfs2_blkrsv_get(struct gfs2_inode *ip)
{
BUG_ON(ip->i_res != NULL);
- ip->i_res = kzalloc(sizeof(struct gfs2_blkreserv), GFP_NOFS);
- return ip->i_res;
+ ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+ if (!ip->i_res)
+ return -ENOMEM;
+ return 0;
}
/**
@@ -1038,6 +1037,8 @@ static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
/**
* try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
* @rgd: The rgrp
+ * @last_unlinked: block address of the last dinode we unlinked
+ * @skip: block address we should explicitly not unlink
*
* Returns: 0 if no error
* The inode, if one has been found, in inode.
@@ -1102,7 +1103,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
/**
* get_local_rgrp - Choose and lock a rgrp for allocation
* @ip: the inode to reserve space for
- * @rgp: the chosen and locked rgrp
+ * @last_unlinked: the last unlinked block
*
* Try to acquire rgrp in way which avoids contending with others.
*
@@ -1164,13 +1165,14 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
static void gfs2_blkrsv_put(struct gfs2_inode *ip)
{
BUG_ON(ip->i_res == NULL);
- kfree(ip->i_res);
+ kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
ip->i_res = NULL;
}
/**
* gfs2_inplace_reserve - Reserve space in the filesystem
* @ip: the inode to reserve space for
+ * @requested: the number of blocks to be reserved
*
* Returns: errno
*/
@@ -1179,14 +1181,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_blkreserv *rs;
- int error = 0;
+ int error;
u64 last_unlinked = NO_BLOCK;
int tries = 0;
- rs = gfs2_blkrsv_get(ip);
- if (!rs)
- return -ENOMEM;
+ error = gfs2_blkrsv_get(ip);
+ if (error)
+ return error;
+ rs = ip->i_res;
rs->rs_requested = requested;
if (gfs2_assert_warn(sdp, requested)) {
error = -EINVAL;
@@ -1268,7 +1271,6 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
* @rgd: the resource group descriptor
* @goal: the goal block within the RG (start here to search for avail block)
* @state: GFS2_BLKST_XXX the before-allocation state to find
- * @dinode: TRUE if the first block we allocate is for a dinode
* @rbi: address of the pointer to the bitmap containing the block found
*
* Walk rgrp's bitmap to find bits that represent a block in @state.
@@ -1282,13 +1284,12 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
* Returns: the block number found relative to the bitmap rbi
*/
-static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
- unsigned char state,
+static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state,
struct gfs2_bitmap **rbi)
{
struct gfs2_bitmap *bi = NULL;
const u32 length = rgd->rd_length;
- u32 blk = BFITNOENT;
+ u32 biblk = BFITNOENT;
unsigned int buf, x;
const u8 *buffer = NULL;
@@ -1325,8 +1326,8 @@ do_search:
if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
buffer = bi->bi_clone + bi->bi_offset;
- blk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
- if (blk != BFITNOENT)
+ biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+ if (biblk != BFITNOENT)
break;
if ((goal == 0) && (state == GFS2_BLKST_FREE))
@@ -1339,10 +1340,10 @@ skip:
goal = 0;
}
- if (blk != BFITNOENT)
+ if (biblk != BFITNOENT)
*rbi = bi;
- return blk;
+ return biblk;
}
/**
@@ -1367,8 +1368,8 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
*n = 0;
buffer = bi->bi_bh->b_data + bi->bi_offset;
gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
- gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
- bi, blk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+ gfs2_setbit(rgd, bi->bi_clone, bi, blk,
+ dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
(*n)++;
goal = blk;
while (*n < elen) {
@@ -1378,8 +1379,7 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
GFS2_BLKST_FREE)
break;
- gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
- bi, goal, GFS2_BLKST_USED);
+ gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED);
(*n)++;
}
blk = gfs2_bi2rgd_blk(bi, blk);
@@ -1436,8 +1436,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
bi->bi_len);
}
gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
- gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
- bi, buf_blk, new_state);
+ gfs2_setbit(rgd, NULL, bi, buf_blk, new_state);
}
return rgd;
@@ -1557,7 +1556,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
ip->i_inode.i_gid);
rgd->rd_free_clone -= *nblocks;
- trace_gfs2_block_alloc(ip, block, *nblocks,
+ trace_gfs2_block_alloc(ip, rgd, block, *nblocks,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
*bn = block;
return 0;
@@ -1584,7 +1583,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
- trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
+ trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1622,7 +1621,7 @@ void gfs2_unlink_di(struct inode *inode)
rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
return;
- trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
+ trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
}
@@ -1652,7 +1651,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
gfs2_free_uninit_di(rgd, ip->i_no_addr);
- trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
+ trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, ip->i_no_addr, 1);
}
@@ -1752,7 +1751,6 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
* and initialize an array of glock holders for them
* @rlist: the list of resource groups
* @state: the lock state to acquire the RG lock in
- * @flags: the modifier flags for the holder structures
*
* FIXME: Don't use NOFAIL
*
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index dfa89cd75534..1b8b81588199 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -457,10 +457,10 @@ TRACE_EVENT(gfs2_bmap,
/* Keep track of blocks as they are allocated/freed */
TRACE_EVENT(gfs2_block_alloc,
- TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
- u8 block_state),
+ TP_PROTO(const struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 block, unsigned len, u8 block_state),
- TP_ARGS(ip, block, len, block_state),
+ TP_ARGS(ip, rgd, block, len, block_state),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -468,6 +468,8 @@ TRACE_EVENT(gfs2_block_alloc,
__field( u64, inum )
__field( u32, len )
__field( u8, block_state )
+ __field( u64, rd_addr )
+ __field( u32, rd_free_clone )
),
TP_fast_assign(
@@ -476,14 +478,18 @@ TRACE_EVENT(gfs2_block_alloc,
__entry->inum = ip->i_no_addr;
__entry->len = len;
__entry->block_state = block_state;
+ __entry->rd_addr = rgd->rd_addr;
+ __entry->rd_free_clone = rgd->rd_free_clone;
),
- TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+ TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long)__entry->len,
- block_state_name(__entry->block_state))
+ block_state_name(__entry->block_state),
+ (unsigned long long)__entry->rd_addr,
+ __entry->rd_free_clone)
);
#endif /* _TRACE_GFS2_H */
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 86ac75d99d31..ad3e2fb763d7 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -50,8 +50,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
if (revokes)
tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
sizeof(u64));
- INIT_LIST_HEAD(&tr->tr_list_buf);
-
gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
error = gfs2_glock_nq(&tr->tr_t_gh);
@@ -93,10 +91,21 @@ static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
up_read(&sdp->sd_log_flush_lock);
}
+static void gfs2_print_trans(const struct gfs2_trans *tr)
+{
+ print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
+ printk(KERN_WARNING "GFS2: blocks=%u revokes=%u reserved=%u touched=%d\n",
+ tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched);
+ printk(KERN_WARNING "GFS2: Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
+ tr->tr_num_buf_new, tr->tr_num_buf_rm,
+ tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
+ tr->tr_num_revoke, tr->tr_num_revoke_rm);
+}
+
void gfs2_trans_end(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr = current->journal_info;
-
+ s64 nbuf;
BUG_ON(!tr);
current->journal_info = NULL;
@@ -110,16 +119,13 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
return;
}
- if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) {
- fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ",
- tr->tr_num_buf, tr->tr_blocks);
- print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
- }
- if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) {
- fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ",
- tr->tr_num_revoke, tr->tr_revokes);
- print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
- }
+ nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
+ nbuf -= tr->tr_num_buf_rm;
+ nbuf -= tr->tr_num_databuf_rm;
+
+ if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) &&
+ (tr->tr_num_revoke <= tr->tr_revokes)))
+ gfs2_print_trans(tr);
gfs2_log_commit(sdp, tr);
if (tr->tr_t_gh.gh_gl) {
@@ -152,16 +158,16 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
gfs2_attach_bufdata(gl, bh, meta);
bd = bh->b_private;
}
- lops_add(sdp, &bd->bd_le);
+ lops_add(sdp, bd);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
{
- BUG_ON(!list_empty(&bd->bd_le.le_list));
+ BUG_ON(!list_empty(&bd->bd_list));
BUG_ON(!list_empty(&bd->bd_ail_st_list));
BUG_ON(!list_empty(&bd->bd_ail_gl_list));
- lops_init_le(&bd->bd_le, &gfs2_revoke_lops);
- lops_add(sdp, &bd->bd_le);
+ lops_init_le(bd, &gfs2_revoke_lops);
+ lops_add(sdp, bd);
}
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
@@ -171,9 +177,9 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
unsigned int n = len;
gfs2_log_lock(sdp);
- list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_le.le_list) {
+ list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_list) {
if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) {
- list_del_init(&bd->bd_le.le_list);
+ list_del_init(&bd->bd_list);
gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
sdp->sd_log_num_revoke--;
kmem_cache_free(gfs2_bufdata_cachep, bd);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 9e7765e8e7b0..f00d7c5744f6 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -25,7 +25,8 @@ struct kmem_cache *gfs2_inode_cachep __read_mostly;
struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
struct kmem_cache *gfs2_quotad_cachep __read_mostly;
-mempool_t *gfs2_bh_pool __read_mostly;
+struct kmem_cache *gfs2_rsrv_cachep __read_mostly;
+mempool_t *gfs2_page_pool __read_mostly;
void gfs2_assert_i(struct gfs2_sbd *sdp)
{
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index a4ce76c67dbb..3586b0dd6aa7 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -152,7 +152,8 @@ extern struct kmem_cache *gfs2_inode_cachep;
extern struct kmem_cache *gfs2_bufdata_cachep;
extern struct kmem_cache *gfs2_rgrpd_cachep;
extern struct kmem_cache *gfs2_quotad_cachep;
-extern mempool_t *gfs2_bh_pool;
+extern struct kmem_cache *gfs2_rsrv_cachep;
+extern mempool_t *gfs2_page_pool;
static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
unsigned int *p)
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 4dfbfec357e8..ec2a9c23f0c9 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -366,6 +366,10 @@ int hfsplus_rename_cat(u32 cnid,
err = hfs_brec_find(&src_fd);
if (err)
goto out;
+ if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset,
src_fd.entrylength);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 88e155f895c6..26b53fb09f68 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -150,6 +150,11 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
filp->f_pos++;
/* fall through */
case 1:
+ if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
+
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
fd.entrylength);
if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
@@ -181,6 +186,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
err = -EIO;
goto out;
}
+
+ if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
+
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
fd.entrylength);
type = be16_to_cpu(entry.type);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 28cf06e4ec84..001ef01d2fe2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -485,6 +485,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
inode->i_fop = &simple_dir_operations;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
+ lockdep_annotate_inode_mutex_key(inode);
}
return inode;
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 806525a7269c..840f70f50792 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -723,7 +723,7 @@ start_journal_io:
if (commit_transaction->t_need_data_flush &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
/* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -859,7 +859,7 @@ wait_for_iobuf:
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
journal->j_flags & JBD2_BARRIER) {
- blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL);
+ blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
}
if (err)
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index ad271c70aa25..5a2dec2b064c 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -234,8 +234,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
return 0;
jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
- spin_lock(&c->erase_completion_lock);
mutex_lock(&c->alloc_sem);
+ spin_lock(&c->erase_completion_lock);
}
/* First, work out which block we're garbage-collecting */
diff --git a/fs/libfs.c b/fs/libfs.c
index 18d08f5db53a..f86ec27a4230 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -68,7 +68,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
int dcache_dir_open(struct inode *inode, struct file *file)
{
- static struct qstr cursor_name = {.len = 1, .name = "."};
+ static struct qstr cursor_name = QSTR_INIT(".", 1);
file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
@@ -225,7 +225,7 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
struct dentry *dentry;
struct inode *root;
- struct qstr d_name = {.name = name, .len = strlen(name)};
+ struct qstr d_name = QSTR_INIT(name, strlen(name));
if (IS_ERR(s))
return ERR_CAST(s);
diff --git a/fs/namei.c b/fs/namei.c
index 0062dd17eb55..f9e883c1b856 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -116,47 +116,37 @@
* POSIX.1 2.4: an empty pathname is invalid (ENOENT).
* PATH_MAX includes the nul terminator --RR.
*/
-static int do_getname(const char __user *filename, char *page)
-{
- int retval;
- unsigned long len = PATH_MAX;
-
- if (!segment_eq(get_fs(), KERNEL_DS)) {
- if ((unsigned long) filename >= TASK_SIZE)
- return -EFAULT;
- if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
- len = TASK_SIZE - (unsigned long) filename;
- }
-
- retval = strncpy_from_user(page, filename, len);
- if (retval > 0) {
- if (retval < len)
- return 0;
- return -ENAMETOOLONG;
- } else if (!retval)
- retval = -ENOENT;
- return retval;
-}
-
static char *getname_flags(const char __user *filename, int flags, int *empty)
{
- char *result = __getname();
- int retval;
+ char *result = __getname(), *err;
+ int len;
- if (!result)
+ if (unlikely(!result))
return ERR_PTR(-ENOMEM);
- retval = do_getname(filename, result);
- if (retval < 0) {
- if (retval == -ENOENT && empty)
+ len = strncpy_from_user(result, filename, PATH_MAX);
+ err = ERR_PTR(len);
+ if (unlikely(len < 0))
+ goto error;
+
+ /* The empty path is special. */
+ if (unlikely(!len)) {
+ if (empty)
*empty = 1;
- if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
- __putname(result);
- return ERR_PTR(retval);
- }
+ err = ERR_PTR(-ENOENT);
+ if (!(flags & LOOKUP_EMPTY))
+ goto error;
+ }
+
+ err = ERR_PTR(-ENAMETOOLONG);
+ if (likely(len < PATH_MAX)) {
+ audit_getname(result);
+ return result;
}
- audit_getname(result);
- return result;
+
+error:
+ __putname(result);
+ return err;
}
char *getname(const char __user * filename)
@@ -1154,12 +1144,25 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
- *inode = nd->inode;
- dentry = __d_lookup_rcu(parent, name, &seq, inode);
+ dentry = __d_lookup_rcu(parent, name, &seq, nd->inode);
if (!dentry)
goto unlazy;
- /* Memory barrier in read_seqcount_begin of child is enough */
+ /*
+ * This sequence count validates that the inode matches
+ * the dentry name information from lookup.
+ */
+ *inode = dentry->d_inode;
+ if (read_seqcount_retry(&dentry->d_seq, seq))
+ return -ECHILD;
+
+ /*
+ * This sequence count validates that the parent had no
+ * changes while we did the lookup of the dentry above.
+ *
+ * The memory barrier in read_seqcount_begin of child is
+ * enough, we can use __read_seqcount_retry here.
+ */
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
nd->seq = seq;
@@ -1429,7 +1432,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
unsigned long hash = 0;
for (;;) {
- a = *(unsigned long *)name;
+ a = load_unaligned_zeropad(name);
if (len < sizeof(unsigned long))
break;
hash += a;
@@ -1459,7 +1462,7 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
do {
hash = (hash + a) * 9;
len += sizeof(unsigned long);
- a = *(unsigned long *)(name+len);
+ a = load_unaligned_zeropad(name+len);
/* Do we have any NUL or '/' bytes in this word? */
mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/'));
} while (!mask);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9c94297bb70e..7f6a23f0244e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -38,6 +38,8 @@
#include <linux/buffer_head.h> /* various write calls */
#include <linux/prefetch.h>
+#include "../pnfs.h"
+#include "../internal.h"
#include "blocklayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -868,7 +870,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = max_resp_sz >> PAGE_SHIFT;
+ max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s max_resp_sz %u max_pages %d\n",
__func__, max_resp_sz, max_pages);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index da7b5e4ff9ec..60f7e4ec842c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1729,7 +1729,8 @@ error:
*/
struct nfs_server *nfs_clone_server(struct nfs_server *source,
struct nfs_fh *fh,
- struct nfs_fattr *fattr)
+ struct nfs_fattr *fattr,
+ rpc_authflavor_t flavor)
{
struct nfs_server *server;
struct nfs_fattr *fattr_fsinfo;
@@ -1758,7 +1759,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
error = nfs_init_server_rpcclient(server,
source->client->cl_timeout,
- source->client->cl_auth->au_flavor);
+ flavor);
if (error < 0)
goto out_free_server;
if (!IS_ERR(source->client_acl))
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4aaf0316d76a..eedd24d0ad2e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -477,10 +477,7 @@ different:
static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
- struct qstr filename = {
- .len = entry->len,
- .name = entry->name,
- };
+ struct qstr filename = QSTR_INIT(entry->name, entry->len);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = parent->d_inode;
@@ -1429,7 +1426,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
}
open_flags = nd->intent.open.flags;
- attr.ia_valid = 0;
+ attr.ia_valid = ATTR_OPEN;
ctx = create_nfs_open_context(dentry, open_flags);
res = ERR_CAST(ctx);
@@ -1536,7 +1533,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
if (IS_ERR(ctx))
goto out;
- attr.ia_valid = 0;
+ attr.ia_valid = ATTR_OPEN;
if (openflags & O_TRUNC) {
attr.ia_valid |= ATTR_SIZE;
attr.ia_size = 0;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b7f348bb618b..ba3019f5934c 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -554,12 +554,16 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
struct nfs_client *clp;
int error = 0;
+ if (!try_module_get(THIS_MODULE))
+ return 0;
+
while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) {
error = __rpc_pipefs_event(clp, event, sb);
nfs_put_client(clp);
if (error)
break;
}
+ module_put(THIS_MODULE);
return error;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2476dc69365f..b777bdaba4c5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -165,7 +165,8 @@ extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fh *,
- struct nfs_fattr *);
+ struct nfs_fattr *,
+ rpc_authflavor_t);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern int nfs4_check_client_ready(struct nfs_client *clp);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
@@ -186,10 +187,10 @@ static inline void nfs_fs_proc_exit(void)
/* nfs4namespace.c */
#ifdef CONFIG_NFS_V4
-extern struct vfsmount *nfs_do_refmount(struct dentry *dentry);
+extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry);
#else
static inline
-struct vfsmount *nfs_do_refmount(struct dentry *dentry)
+struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
{
return ERR_PTR(-ENOENT);
}
@@ -234,7 +235,6 @@ extern const u32 nfs41_maxwrite_overhead;
/* nfs4proc.c */
#ifdef CONFIG_NFS_V4
extern struct rpc_procinfo nfs4_procedures[];
-void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *);
#endif
extern int nfs4_init_ds_session(struct nfs_client *clp);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1807866bb3ab..d51868e5683c 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -148,66 +148,31 @@ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
return pseudoflavor;
}
-static int nfs_negotiate_security(const struct dentry *parent,
- const struct dentry *dentry,
- rpc_authflavor_t *flavor)
+static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
+ struct qstr *name,
+ struct nfs_fh *fh,
+ struct nfs_fattr *fattr)
{
- struct page *page;
- struct nfs4_secinfo_flavors *flavors;
- int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
- int ret = -EPERM;
-
- secinfo = NFS_PROTO(parent->d_inode)->secinfo;
- if (secinfo != NULL) {
- page = alloc_page(GFP_KERNEL);
- if (!page) {
- ret = -ENOMEM;
- goto out;
- }
- flavors = page_address(page);
- ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
- *flavor = nfs_find_best_sec(flavors);
- put_page(page);
- }
-
-out:
- return ret;
-}
-
-static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
- struct dentry *dentry, struct path *path,
- struct nfs_fh *fh, struct nfs_fattr *fattr,
- rpc_authflavor_t *flavor)
-{
- struct rpc_clnt *clone;
- struct rpc_auth *auth;
int err;
- err = nfs_negotiate_security(parent, path->dentry, flavor);
- if (err < 0)
- goto out;
- clone = rpc_clone_client(server->client);
- auth = rpcauth_create(*flavor, clone);
- if (!auth) {
- err = -EIO;
- goto out_shutdown;
- }
- err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
- &path->dentry->d_name,
- fh, fattr);
-out_shutdown:
- rpc_shutdown_client(clone);
-out:
- return err;
+ if (NFS_PROTO(dir)->version == 4)
+ return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
+
+ err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
+ if (err)
+ return ERR_PTR(err);
+ return rpc_clone_client(NFS_SERVER(dir)->client);
}
#else /* CONFIG_NFS_V4 */
-static inline int nfs_lookup_with_sec(struct nfs_server *server,
- struct dentry *parent, struct dentry *dentry,
- struct path *path, struct nfs_fh *fh,
- struct nfs_fattr *fattr,
- rpc_authflavor_t *flavor)
+static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir,
+ struct qstr *name,
+ struct nfs_fh *fh,
+ struct nfs_fattr *fattr)
{
- return -EPERM;
+ int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr);
+ if (err)
+ return ERR_PTR(err);
+ return rpc_clone_client(NFS_SERVER(dir)->client);
}
#endif /* CONFIG_NFS_V4 */
@@ -226,12 +191,10 @@ static inline int nfs_lookup_with_sec(struct nfs_server *server,
struct vfsmount *nfs_d_automount(struct path *path)
{
struct vfsmount *mnt;
- struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
struct dentry *parent;
struct nfs_fh *fh = NULL;
struct nfs_fattr *fattr = NULL;
- int err;
- rpc_authflavor_t flavor = RPC_AUTH_UNIX;
+ struct rpc_clnt *client;
dprintk("--> nfs_d_automount()\n");
@@ -249,21 +212,19 @@ struct vfsmount *nfs_d_automount(struct path *path)
/* Look it up again to get its attributes */
parent = dget_parent(path->dentry);
- err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
- &path->dentry->d_name,
- fh, fattr);
- if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL)
- err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor);
+ client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr);
dput(parent);
- if (err != 0) {
- mnt = ERR_PTR(err);
+ if (IS_ERR(client)) {
+ mnt = ERR_CAST(client);
goto out;
}
if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
- mnt = nfs_do_refmount(path->dentry);
+ mnt = nfs_do_refmount(client, path->dentry);
else
- mnt = nfs_do_submount(path->dentry, fh, fattr, flavor);
+ mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor);
+ rpc_shutdown_client(client);
+
if (IS_ERR(mnt))
goto out;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 5242eae6711a..75c68299358e 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -398,8 +398,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_removeargs arg = {
.fh = NFS_FH(dir),
- .name.len = name->len,
- .name.name = name->name,
+ .name = *name,
};
struct nfs_removeres res;
struct rpc_message msg = {
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 97ecc863dd76..8d75021020b3 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -59,6 +59,7 @@ struct nfs_unique_id {
#define NFS_SEQID_CONFIRMED 1
struct nfs_seqid_counter {
+ ktime_t create_time;
int owner_id;
int flags;
u32 counter;
@@ -204,6 +205,9 @@ struct nfs4_state_maintenance_ops {
extern const struct dentry_operations nfs4_dentry_operations;
extern const struct inode_operations nfs4_dir_inode_operations;
+/* nfs4namespace.c */
+struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *);
+
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
@@ -212,8 +216,11 @@ extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
-extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
- struct nfs4_fs_locations *fs_locations, struct page *page);
+extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
+ struct nfs4_fs_locations *, struct page *);
+extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *,
+ struct nfs_fh *, struct nfs_fattr *);
+extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
extern int nfs4_release_lockowner(struct nfs4_lock_state *);
extern const struct xattr_handler *nfs4_xattr_handlers[];
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index a866bbd2890a..c9cff9adb2d3 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -699,7 +699,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla
* GETDEVICEINFO's maxcount
*/
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = max_resp_sz >> PAGE_SHIFT;
+ max_pages = nfs_page_array_len(0, max_resp_sz);
dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
__func__, inode, max_resp_sz, max_pages);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 9c8eca315f43..a7f3dedc4ec7 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -52,6 +52,30 @@ Elong:
}
/*
+ * return the path component of "<server>:<path>"
+ * nfspath - the "<server>:<path>" string
+ * end - one past the last char that could contain "<server>:"
+ * returns NULL on failure
+ */
+static char *nfs_path_component(const char *nfspath, const char *end)
+{
+ char *p;
+
+ if (*nfspath == '[') {
+ /* parse [] escaped IPv6 addrs */
+ p = strchr(nfspath, ']');
+ if (p != NULL && ++p < end && *p == ':')
+ return p + 1;
+ } else {
+ /* otherwise split on first colon */
+ p = strchr(nfspath, ':');
+ if (p != NULL && p < end)
+ return p + 1;
+ }
+ return NULL;
+}
+
+/*
* Determine the mount path as a string
*/
static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
@@ -59,9 +83,9 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
char *limit;
char *path = nfs_path(&limit, dentry, buffer, buflen);
if (!IS_ERR(path)) {
- char *colon = strchr(path, ':');
- if (colon && colon < limit)
- path = colon + 1;
+ char *path_component = nfs_path_component(path, limit);
+ if (path_component)
+ return path_component;
}
return path;
}
@@ -108,6 +132,58 @@ static size_t nfs_parse_server_name(char *string, size_t len,
return ret;
}
+static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name)
+{
+ struct page *page;
+ struct nfs4_secinfo_flavors *flavors;
+ rpc_authflavor_t flavor;
+ int err;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+ flavors = page_address(page);
+
+ err = nfs4_proc_secinfo(inode, name, flavors);
+ if (err < 0) {
+ flavor = err;
+ goto out;
+ }
+
+ flavor = nfs_find_best_sec(flavors);
+
+out:
+ put_page(page);
+ return flavor;
+}
+
+/*
+ * Please call rpc_shutdown_client() when you are done with this client.
+ */
+struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
+ struct qstr *name)
+{
+ struct rpc_clnt *clone;
+ struct rpc_auth *auth;
+ rpc_authflavor_t flavor;
+
+ flavor = nfs4_negotiate_security(inode, name);
+ if (flavor < 0)
+ return ERR_PTR(flavor);
+
+ clone = rpc_clone_client(clnt);
+ if (IS_ERR(clone))
+ return clone;
+
+ auth = rpcauth_create(flavor, clone);
+ if (!auth) {
+ rpc_shutdown_client(clone);
+ clone = ERR_PTR(-EIO);
+ }
+
+ return clone;
+}
+
static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
char *page, char *page2,
const struct nfs4_fs_location *location)
@@ -224,7 +300,7 @@ out:
* @dentry - dentry of referral
*
*/
-struct vfsmount *nfs_do_refmount(struct dentry *dentry)
+struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
{
struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct dentry *parent;
@@ -250,7 +326,7 @@ struct vfsmount *nfs_do_refmount(struct dentry *dentry)
dprintk("%s: getting locations for %s/%s\n",
__func__, parent->d_name.name, dentry->d_name.name);
- err = nfs4_proc_fs_locations(parent->d_inode, &dentry->d_name, fs_locations, page);
+ err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
dput(parent);
if (err != 0 ||
fs_locations->nlocations <= 0 ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f82bde005a82..ab985f6f0da8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -838,7 +838,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
p->o_arg.open_flags = flags;
p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
p->o_arg.clientid = server->nfs_client->cl_clientid;
- p->o_arg.id = sp->so_seqid.owner_id;
+ p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time);
+ p->o_arg.id.uniquifier = sp->so_seqid.owner_id;
p->o_arg.name = &dentry->d_name;
p->o_arg.server = server;
p->o_arg.bitmask = server->attr_bitmask;
@@ -1466,8 +1467,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
goto unlock_no_action;
rcu_read_unlock();
}
- /* Update sequence id. */
- data->o_arg.id = sp->so_seqid.owner_id;
+ /* Update client id. */
data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
@@ -1954,10 +1954,19 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
};
int err;
do {
- err = nfs4_handle_exception(server,
- _nfs4_do_setattr(inode, cred, fattr, sattr, state),
- &exception);
+ err = _nfs4_do_setattr(inode, cred, fattr, sattr, state);
+ switch (err) {
+ case -NFS4ERR_OPENMODE:
+ if (state && !(state->state & FMODE_WRITE)) {
+ err = -EBADF;
+ if (sattr->ia_valid & ATTR_OPEN)
+ err = -EACCES;
+ goto out;
+ }
+ }
+ err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
+out:
return err;
}
@@ -2368,8 +2377,9 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
* Note that we'll actually follow the referral later when
* we detect fsid mismatch in inode revalidation
*/
-static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
- struct nfs_fattr *fattr, struct nfs_fh *fhandle)
+static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
+ const struct qstr *name, struct nfs_fattr *fattr,
+ struct nfs_fh *fhandle)
{
int status = -ENOMEM;
struct page *page = NULL;
@@ -2382,7 +2392,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
if (locations == NULL)
goto out;
- status = nfs4_proc_fs_locations(dir, name, locations, page);
+ status = nfs4_proc_fs_locations(client, dir, name, locations, page);
if (status != 0)
goto out;
/* Make sure server returned a different fsid for the referral */
@@ -2519,39 +2529,84 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
return status;
}
-void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh)
+static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
{
- memset(fh, 0, sizeof(struct nfs_fh));
- fattr->fsid.major = 1;
fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
- NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT;
+ NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_MOUNTPOINT;
fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
fattr->nlink = 2;
}
-static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
- struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
+ struct qstr *name, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
{
struct nfs4_exception exception = { };
+ struct rpc_clnt *client = *clnt;
int err;
do {
- int status;
-
- status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr);
- switch (status) {
+ err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr);
+ switch (err) {
case -NFS4ERR_BADNAME:
- return -ENOENT;
+ err = -ENOENT;
+ goto out;
case -NFS4ERR_MOVED:
- return nfs4_get_referral(dir, name, fattr, fhandle);
+ err = nfs4_get_referral(client, dir, name, fattr, fhandle);
+ goto out;
case -NFS4ERR_WRONGSEC:
- nfs_fixup_secinfo_attributes(fattr, fhandle);
+ err = -EPERM;
+ if (client != *clnt)
+ goto out;
+
+ client = nfs4_create_sec_client(client, dir, name);
+ if (IS_ERR(client))
+ return PTR_ERR(client);
+
+ exception.retry = 1;
+ break;
+ default:
+ err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception);
}
- err = nfs4_handle_exception(NFS_SERVER(dir),
- status, &exception);
} while (exception.retry);
+
+out:
+ if (err == 0)
+ *clnt = client;
+ else if (client != *clnt)
+ rpc_shutdown_client(client);
+
return err;
}
+static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ int status;
+ struct rpc_clnt *client = NFS_CLIENT(dir);
+
+ status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr);
+ if (client != NFS_CLIENT(dir)) {
+ rpc_shutdown_client(client);
+ nfs_fixup_secinfo_attributes(fattr);
+ }
+ return status;
+}
+
+struct rpc_clnt *
+nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+{
+ int status;
+ struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir));
+
+ status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr);
+ if (status < 0) {
+ rpc_shutdown_client(client);
+ return ERR_PTR(status);
+ }
+ return client;
+}
+
static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
{
struct nfs_server *server = NFS_SERVER(inode);
@@ -2727,8 +2782,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
struct nfs_server *server = NFS_SERVER(dir);
struct nfs_removeargs args = {
.fh = NFS_FH(dir),
- .name.len = name->len,
- .name.name = name->name,
+ .name = *name,
.bitmask = server->attr_bitmask,
};
struct nfs_removeres res = {
@@ -3619,16 +3673,16 @@ out:
return ret;
}
-static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len)
+static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
{
struct nfs4_cached_acl *acl;
- if (buf && acl_len <= PAGE_SIZE) {
+ if (pages && acl_len <= PAGE_SIZE) {
acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL);
if (acl == NULL)
goto out;
acl->cached = 1;
- memcpy(acl->data, buf, acl_len);
+ _copy_from_pages(acl->data, pages, pgbase, acl_len);
} else {
acl = kmalloc(sizeof(*acl), GFP_KERNEL);
if (acl == NULL)
@@ -3661,7 +3715,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
struct nfs_getaclres res = {
.acl_len = buflen,
};
- void *resp_buf;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
.rpc_argp = &args,
@@ -3675,24 +3728,27 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
if (npages == 0)
npages = 1;
+ /* Add an extra page to handle the bitmap returned */
+ npages++;
+
for (i = 0; i < npages; i++) {
pages[i] = alloc_page(GFP_KERNEL);
if (!pages[i])
goto out_free;
}
- if (npages > 1) {
- /* for decoding across pages */
- res.acl_scratch = alloc_page(GFP_KERNEL);
- if (!res.acl_scratch)
- goto out_free;
- }
+
+ /* for decoding across pages */
+ res.acl_scratch = alloc_page(GFP_KERNEL);
+ if (!res.acl_scratch)
+ goto out_free;
+
args.acl_len = npages * PAGE_SIZE;
args.acl_pgbase = 0;
+
/* Let decode_getfacl know not to fail if the ACL data is larger than
* the page we send as a guess */
if (buf == NULL)
res.acl_flags |= NFS4_ACL_LEN_REQUEST;
- resp_buf = page_address(pages[0]);
dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
__func__, buf, buflen, npages, args.acl_len);
@@ -3703,9 +3759,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
acl_len = res.acl_len - res.acl_data_offset;
if (acl_len > args.acl_len)
- nfs4_write_cached_acl(inode, NULL, acl_len);
+ nfs4_write_cached_acl(inode, NULL, 0, acl_len);
else
- nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset,
+ nfs4_write_cached_acl(inode, pages, res.acl_data_offset,
acl_len);
if (buf) {
ret = -ERANGE;
@@ -4558,7 +4614,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .inode = state->inode,
+ };
int err;
do {
@@ -4576,7 +4634,9 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .inode = state->inode,
+ };
int err;
err = nfs4_set_lock_state(state, request);
@@ -4676,6 +4736,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *
{
struct nfs4_exception exception = {
.state = state,
+ .inode = state->inode,
};
int err;
@@ -4721,6 +4782,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (state == NULL)
return -ENOLCK;
+ /*
+ * Don't rely on the VFS having checked the file open mode,
+ * since it won't do this for flock() locks.
+ */
+ switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) {
+ case F_RDLCK:
+ if (!(filp->f_mode & FMODE_READ))
+ return -EBADF;
+ break;
+ case F_WRLCK:
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+ }
+
do {
status = nfs4_proc_setlk(state, cmd, request);
if ((status != -EAGAIN) || IS_SETLK(cmd))
@@ -4891,8 +4966,10 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
fattr->nlink = 2;
}
-int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
- struct nfs4_fs_locations *fs_locations, struct page *page)
+static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
+ const struct qstr *name,
+ struct nfs4_fs_locations *fs_locations,
+ struct page *page)
{
struct nfs_server *server = NFS_SERVER(dir);
u32 bitmask[2] = {
@@ -4926,11 +5003,26 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
nfs_fattr_init(&fs_locations->fattr);
fs_locations->server = server;
fs_locations->nlocations = 0;
- status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+ status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0);
dprintk("%s: returned status = %d\n", __func__, status);
return status;
}
+int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
+ const struct qstr *name,
+ struct nfs4_fs_locations *fs_locations,
+ struct page *page)
+{
+ struct nfs4_exception exception = { };
+ int err;
+ do {
+ err = nfs4_handle_exception(NFS_SERVER(dir),
+ _nfs4_proc_fs_locations(client, dir, name, fs_locations, page),
+ &exception);
+ } while (exception.retry);
+ return err;
+}
+
static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
{
int status;
@@ -4953,8 +5045,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
return status;
}
-static int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
- struct nfs4_secinfo_flavors *flavors)
+int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
+ struct nfs4_secinfo_flavors *flavors)
{
struct nfs4_exception exception = { };
int err;
@@ -5029,10 +5121,9 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
nfs4_construct_boot_verifier(clp, &verifier);
args.id_len = scnprintf(args.id, sizeof(args.id),
- "%s/%s.%s/%u",
+ "%s/%s/%u",
clp->cl_ipaddr,
- init_utsname()->nodename,
- init_utsname()->domainname,
+ clp->cl_rpcclient->cl_nodename,
clp->cl_rpcclient->cl_auth->au_flavor);
res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 0f43414eb25a..7f0fcfc1fe9d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -393,6 +393,7 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
static void
nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
{
+ sc->create_time = ktime_get();
sc->flags = 0;
sc->counter = 0;
spin_lock_init(&sc->lock);
@@ -434,13 +435,17 @@ nfs4_alloc_state_owner(struct nfs_server *server,
static void
nfs4_drop_state_owner(struct nfs4_state_owner *sp)
{
- if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+ struct rb_node *rb_node = &sp->so_server_node;
+
+ if (!RB_EMPTY_NODE(rb_node)) {
struct nfs_server *server = sp->so_server;
struct nfs_client *clp = server->nfs_client;
spin_lock(&clp->cl_lock);
- rb_erase(&sp->so_server_node, &server->state_owners);
- RB_CLEAR_NODE(&sp->so_server_node);
+ if (!RB_EMPTY_NODE(rb_node)) {
+ rb_erase(rb_node, &server->state_owners);
+ RB_CLEAR_NODE(rb_node);
+ }
spin_unlock(&clp->cl_lock);
}
}
@@ -516,6 +521,14 @@ out:
/**
* nfs4_put_state_owner - Release a nfs4_state_owner
* @sp: state owner data to release
+ *
+ * Note that we keep released state owners on an LRU
+ * list.
+ * This caches valid state owners so that they can be
+ * reused, to avoid the OPEN_CONFIRM on minor version 0.
+ * It also pins the uniquifier of dropped state owners for
+ * a while, to ensure that those state owner names are
+ * never reused.
*/
void nfs4_put_state_owner(struct nfs4_state_owner *sp)
{
@@ -525,15 +538,9 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
return;
- if (!RB_EMPTY_NODE(&sp->so_server_node)) {
- sp->so_expires = jiffies;
- list_add_tail(&sp->so_lru, &server->state_owners_lru);
- spin_unlock(&clp->cl_lock);
- } else {
- nfs4_remove_state_owner_locked(sp);
- spin_unlock(&clp->cl_lock);
- nfs4_free_state_owner(sp);
- }
+ sp->so_expires = jiffies;
+ list_add_tail(&sp->so_lru, &server->state_owners_lru);
+ spin_unlock(&clp->cl_lock);
}
/**
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c74fdb114b48..c54aae364bee 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -74,7 +74,7 @@ static int nfs4_stat_to_errno(int);
/* lock,open owner id:
* we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2)
*/
-#define open_owner_id_maxsz (1 + 1 + 4)
+#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2)
#define lock_owner_id_maxsz (1 + 1 + 4)
#define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2))
@@ -1340,12 +1340,13 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
*/
encode_nfs4_seqid(xdr, arg->seqid);
encode_share_access(xdr, arg->fmode);
- p = reserve_space(xdr, 32);
+ p = reserve_space(xdr, 36);
p = xdr_encode_hyper(p, arg->clientid);
- *p++ = cpu_to_be32(20);
+ *p++ = cpu_to_be32(24);
p = xdr_encode_opaque_fixed(p, "open id:", 8);
*p++ = cpu_to_be32(arg->server->s_dev);
- xdr_encode_hyper(p, arg->id);
+ *p++ = cpu_to_be32(arg->id.uniquifier);
+ xdr_encode_hyper(p, arg->id.create_time);
}
static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -4257,8 +4258,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
status = decode_attr_error(xdr, bitmap, &err);
if (status < 0)
goto xdr_error;
- if (err == -NFS4ERR_WRONGSEC)
- nfs_fixup_secinfo_attributes(fattr, fh);
status = decode_attr_filehandle(xdr, bitmap, fh);
if (status < 0)
@@ -4901,11 +4900,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
bitmap[3] = {0};
struct kvec *iov = req->rq_rcv_buf.head;
int status;
+ size_t page_len = xdr->buf->page_len;
res->acl_len = 0;
if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
goto out;
+
bm_p = xdr->p;
+ res->acl_data_offset = be32_to_cpup(bm_p) + 2;
+ res->acl_data_offset <<= 2;
+ /* Check if the acl data starts beyond the allocated buffer */
+ if (res->acl_data_offset > page_len)
+ return -ERANGE;
+
if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
goto out;
if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
@@ -4915,28 +4922,24 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
size_t hdrlen;
- u32 recvd;
/* The bitmap (xdr len + bitmaps) and the attr xdr len words
* are stored with the acl data to handle the problem of
* variable length bitmaps.*/
xdr->p = bm_p;
- res->acl_data_offset = be32_to_cpup(bm_p) + 2;
- res->acl_data_offset <<= 2;
/* We ignore &savep and don't do consistency checks on
* the attr length. Let userspace figure it out.... */
hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
attrlen += res->acl_data_offset;
- recvd = req->rq_rcv_buf.len - hdrlen;
- if (attrlen > recvd) {
+ if (attrlen > page_len) {
if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
/* getxattr interface called with a NULL buf */
res->acl_len = attrlen;
goto out;
}
- dprintk("NFS: acl reply: attrlen %u > recvd %u\n",
- attrlen, recvd);
+ dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
+ attrlen, page_len);
return -EINVAL;
}
xdr_read_pages(xdr, attrlen);
@@ -5089,16 +5092,13 @@ out_err:
return -EINVAL;
}
-static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
+static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
{
struct nfs4_secinfo_flavor *sec_flavor;
int status;
__be32 *p;
int i, num_flavors;
- status = decode_op_hdr(xdr, OP_SECINFO);
- if (status)
- goto out;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
@@ -5124,6 +5124,7 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
res->flavors->num_flavors++;
}
+ status = 0;
out:
return status;
out_overflow:
@@ -5131,7 +5132,23 @@ out_overflow:
return -EIO;
}
+static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
+{
+ int status = decode_op_hdr(xdr, OP_SECINFO);
+ if (status)
+ return status;
+ return decode_secinfo_common(xdr, res);
+}
+
#if defined(CONFIG_NFS_V4_1)
+static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
+{
+ int status = decode_op_hdr(xdr, OP_SECINFO_NO_NAME);
+ if (status)
+ return status;
+ return decode_secinfo_common(xdr, res);
+}
+
static int decode_exchange_id(struct xdr_stream *xdr,
struct nfs41_exchange_id_res *res)
{
@@ -6816,7 +6833,7 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
status = decode_putrootfh(xdr);
if (status)
goto out;
- status = decode_secinfo(xdr, res);
+ status = decode_secinfo_no_name(xdr, res);
out:
return status;
}
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 8d45f1c318ce..595c5fc21a19 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -604,7 +604,6 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
{
struct objlayout_deviceinfo *odi;
struct pnfs_device pd;
- struct super_block *sb;
struct page *page, **pages;
u32 *p;
int err;
@@ -623,7 +622,6 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
pd.pglen = PAGE_SIZE;
pd.mincount = 0;
- sb = pnfslay->plh_inode->i_sb;
err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
if (err)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b5d451586943..38512bcd2e98 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -587,7 +587,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
/* allocate pages for xdr post processing */
max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = max_resp_sz >> PAGE_SHIFT;
+ max_pages = nfs_page_array_len(0, max_resp_sz);
pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
if (!pages)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b63b6f4d14fb..d6408b6437de 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -335,8 +335,7 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_removeargs arg = {
.fh = NFS_FH(dir),
- .name.len = name->len,
- .name.name = name->name,
+ .name = *name,
};
struct rpc_message msg = {
.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9a0e8ef4a409..0a4be28c2ea3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -322,7 +322,7 @@ out_bad:
while (!list_empty(res)) {
data = list_entry(res->next, struct nfs_read_data, list);
list_del(&data->list);
- nfs_readdata_free(data);
+ nfs_readdata_release(data);
}
nfs_readpage_release(req);
return -ENOMEM;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 37412f706b32..4ac7fca7e4bf 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2428,7 +2428,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
dprintk("--> nfs_xdev_mount()\n");
/* create a new volume representation */
- server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
if (IS_ERR(server)) {
error = PTR_ERR(server);
goto out_err_noserver;
@@ -2767,11 +2767,15 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
char *root_devname;
size_t len;
- len = strlen(hostname) + 3;
+ len = strlen(hostname) + 5;
root_devname = kmalloc(len, GFP_KERNEL);
if (root_devname == NULL)
return ERR_PTR(-ENOMEM);
- snprintf(root_devname, len, "%s:/", hostname);
+ /* Does hostname needs to be enclosed in brackets? */
+ if (strchr(hostname, ':'))
+ snprintf(root_devname, len, "[%s]:/", hostname);
+ else
+ snprintf(root_devname, len, "%s:/", hostname);
root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
kfree(root_devname);
return root_mnt;
@@ -2951,7 +2955,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
dprintk("--> nfs4_xdev_mount()\n");
/* create a new volume representation */
- server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+ server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
if (IS_ERR(server)) {
error = PTR_ERR(server);
goto out_err_noserver;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2c68818f68ac..c07462320f6b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -682,7 +682,8 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
req->wb_bytes = rqend - req->wb_offset;
out_unlock:
spin_unlock(&inode->i_lock);
- nfs_clear_request_commit(req);
+ if (req)
+ nfs_clear_request_commit(req);
return req;
out_flushme:
spin_unlock(&inode->i_lock);
@@ -1018,7 +1019,7 @@ out_bad:
while (!list_empty(res)) {
data = list_entry(res->next, struct nfs_write_data, list);
list_del(&data->list);
- nfs_writedata_free(data);
+ nfs_writedata_release(data);
}
nfs_redirty_request(req);
return -ENOMEM;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 4767429264a2..ed3f9206a0ee 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -577,7 +577,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
struct cld_net *cn = nn->cld_net;
if (mlen != sizeof(*cmsg)) {
- dprintk("%s: got %lu bytes, expected %lu\n", __func__, mlen,
+ dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
sizeof(*cmsg));
return -EINVAL;
}
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index fce2bbee66d4..0bb2c2010b95 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -441,7 +441,7 @@ static struct dentry *nilfs_get_parent(struct dentry *child)
{
unsigned long ino;
struct inode *inode;
- struct qstr dotdot = {.name = "..", .len = 2};
+ struct qstr dotdot = QSTR_INIT("..", 2);
struct nilfs_root *root;
ino = nilfs_inode_by_name(child->d_inode, &dotdot);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 044e7b58d31c..1bfe8802cc1e 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -2005,7 +2005,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
o2net_listen_sock = sock;
INIT_WORK(&o2net_listen_work, o2net_accept_many);
- sock->sk->sk_reuse = 1;
+ sock->sk->sk_reuse = SK_CAN_REUSE;
ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
if (ret < 0) {
printk(KERN_ERR "o2net: Error %d while binding socket at "
diff --git a/fs/open.c b/fs/open.c
index 5720854156db..5eccdcea2d1b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -681,7 +681,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_op = fops_get(inode->i_fop);
- error = security_dentry_open(f, cred);
+ error = security_file_open(f, cred);
if (error)
goto cleanup_all;
diff --git a/fs/pipe.c b/fs/pipe.c
index 25feaa3faac0..fec5e4ad071a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -346,6 +346,16 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+static const struct pipe_buf_operations packet_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = anon_pipe_buf_release,
+ .steal = generic_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
static ssize_t
pipe_read(struct kiocb *iocb, const struct iovec *_iov,
unsigned long nr_segs, loff_t pos)
@@ -407,6 +417,13 @@ redo:
ret += chars;
buf->offset += chars;
buf->len -= chars;
+
+ /* Was it a packet buffer? Clean up and exit */
+ if (buf->flags & PIPE_BUF_FLAG_PACKET) {
+ total_len = chars;
+ buf->len = 0;
+ }
+
if (!buf->len) {
buf->ops = NULL;
ops->release(pipe, buf);
@@ -459,6 +476,11 @@ redo:
return ret;
}
+static inline int is_packetized(struct file *file)
+{
+ return (file->f_flags & O_DIRECT) != 0;
+}
+
static ssize_t
pipe_write(struct kiocb *iocb, const struct iovec *_iov,
unsigned long nr_segs, loff_t ppos)
@@ -593,6 +615,11 @@ redo2:
buf->ops = &anon_pipe_buf_ops;
buf->offset = 0;
buf->len = chars;
+ buf->flags = 0;
+ if (is_packetized(filp)) {
+ buf->ops = &packet_pipe_buf_ops;
+ buf->flags = PIPE_BUF_FLAG_PACKET;
+ }
pipe->nrbufs = ++bufs;
pipe->tmp_page = NULL;
@@ -1013,7 +1040,7 @@ struct file *create_write_pipe(int flags)
goto err_dentry;
f->f_mapping = inode->i_mapping;
- f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
+ f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
f->f_version = 0;
return f;
@@ -1057,7 +1084,7 @@ int do_pipe_flags(int *fd, int flags)
int error;
int fdw, fdr;
- if (flags & ~(O_CLOEXEC | O_NONBLOCK))
+ if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
return -EINVAL;
fw = create_write_pipe(flags);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1c8b280146d7..57b8159f26f3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1799,10 +1799,15 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
if (task) {
files = get_files_struct(task);
if (files) {
+ struct file *file;
rcu_read_lock();
- if (fcheck_files(files, fd)) {
+ file = fcheck_files(files, fd);
+ if (file) {
+ unsigned i_mode, f_mode = file->f_mode;
+
rcu_read_unlock();
put_files_struct(files);
+
if (task_dumpable(task)) {
rcu_read_lock();
cred = __task_cred(task);
@@ -1813,7 +1818,14 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
inode->i_uid = 0;
inode->i_gid = 0;
}
- inode->i_mode &= ~(S_ISUID | S_ISGID);
+
+ i_mode = S_IFLNK;
+ if (f_mode & FMODE_READ)
+ i_mode |= S_IRUSR | S_IXUSR;
+ if (f_mode & FMODE_WRITE)
+ i_mode |= S_IWUSR | S_IXUSR;
+ inode->i_mode = i_mode;
+
security_task_to_inode(task, inode);
put_task_struct(task);
return 1;
@@ -1837,8 +1849,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
unsigned fd = *(const unsigned *)ptr;
- struct file *file;
- struct files_struct *files;
struct inode *inode;
struct proc_inode *ei;
struct dentry *error = ERR_PTR(-ENOENT);
@@ -1848,25 +1858,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
goto out;
ei = PROC_I(inode);
ei->fd = fd;
- files = get_files_struct(task);
- if (!files)
- goto out_iput;
- inode->i_mode = S_IFLNK;
-
- /*
- * We are not taking a ref to the file structure, so we must
- * hold ->file_lock.
- */
- spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
- if (!file)
- goto out_unlock;
- if (file->f_mode & FMODE_READ)
- inode->i_mode |= S_IRUSR | S_IXUSR;
- if (file->f_mode & FMODE_WRITE)
- inode->i_mode |= S_IWUSR | S_IXUSR;
- spin_unlock(&files->file_lock);
- put_files_struct(files);
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
@@ -1879,12 +1870,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
out:
return error;
-out_unlock:
- spin_unlock(&files->file_lock);
- put_files_struct(files);
-out_iput:
- iput(inode);
- goto out;
}
static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -2177,16 +2162,16 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
goto out;
result = ERR_PTR(-EACCES);
- if (lock_trace(task))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
result = ERR_PTR(-ENOENT);
if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
- goto out_unlock;
+ goto out_put_task;
mm = get_task_mm(task);
if (!mm)
- goto out_unlock;
+ goto out_put_task;
down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
@@ -2198,8 +2183,6 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
out_no_vma:
up_read(&mm->mmap_sem);
mmput(mm);
-out_unlock:
- unlock_trace(task);
out_put_task:
put_task_struct(task);
out:
@@ -2233,7 +2216,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
goto out;
ret = -EACCES;
- if (lock_trace(task))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
ret = 0;
@@ -2241,12 +2224,12 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
case 0:
ino = inode->i_ino;
if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
- goto out_unlock;
+ goto out_put_task;
filp->f_pos++;
case 1:
ino = parent_ino(dentry);
if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
- goto out_unlock;
+ goto out_put_task;
filp->f_pos++;
default:
{
@@ -2257,7 +2240,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
mm = get_task_mm(task);
if (!mm)
- goto out_unlock;
+ goto out_put_task;
down_read(&mm->mmap_sem);
nr_files = 0;
@@ -2287,7 +2270,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
flex_array_free(fa);
up_read(&mm->mmap_sem);
mmput(mm);
- goto out_unlock;
+ goto out_put_task;
}
for (i = 0, vma = mm->mmap, pos = 2; vma;
vma = vma->vm_next) {
@@ -2332,8 +2315,6 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
}
-out_unlock:
- unlock_trace(task);
out_put_task:
put_task_struct(task);
out:
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2b9a7607cbd5..1030a716d155 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -597,9 +597,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
if (!page)
continue;
- if (PageReserved(page))
- continue;
-
/* Clear accessed and referenced bits. */
ptep_test_and_clear_young(vma, addr, pte);
ClearPageReferenced(page);
@@ -750,6 +747,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte)
else if (pte_present(pte))
*pme = make_pme(PM_PFRAME(pte_pfn(pte))
| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ else
+ *pme = make_pme(PM_NOT_PRESENT);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -764,6 +763,8 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
if (pmd_present(pmd))
*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ else
+ *pme = make_pme(PM_NOT_PRESENT);
}
#else
static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
@@ -804,8 +805,10 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
/* check to see if we've left 'vma' behind
* and need a new, higher one */
- if (vma && (addr >= vma->vm_end))
+ if (vma && (addr >= vma->vm_end)) {
vma = find_vma(walk->mm, addr);
+ pme = make_pme(PM_NOT_PRESENT);
+ }
/* check that 'vma' actually covers this address,
* and that it isn't a huge page vma */
@@ -833,6 +836,8 @@ static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
if (pte_present(pte))
*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ else
+ *pme = make_pme(PM_NOT_PRESENT);
}
/* This function walks within one hugetlb entry in the single call */
@@ -842,7 +847,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
{
struct pagemapread *pm = walk->private;
int err = 0;
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
+ pagemap_entry_t pme;
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 8007ae7c0d8c..23ade2680a4a 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -11,3 +11,20 @@ config PSTORE
(e.g. ACPI_APEI on X86) which will select this for you.
If you don't have a platform persistent store driver,
say N.
+
+config PSTORE_RAM
+ tristate "Log panic/oops to a RAM buffer"
+ depends on PSTORE
+ depends on HAS_IOMEM
+ depends on HAVE_MEMBLOCK
+ select REED_SOLOMON
+ select REED_SOLOMON_ENC8
+ select REED_SOLOMON_DEC8
+ help
+ This enables panic and oops messages to be logged to a circular
+ buffer in RAM where it can be read back at some later point.
+
+ Note that for historical reasons, the module will be named
+ "ramoops.ko".
+
+ For more information, see Documentation/ramoops.txt.
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
index 760f4bce7d1d..278a44e0d4e1 100644
--- a/fs/pstore/Makefile
+++ b/fs/pstore/Makefile
@@ -5,3 +5,6 @@
obj-y += pstore.o
pstore-objs += inode.o platform.o
+
+ramoops-objs += ram.o ram_core.o
+obj-$(CONFIG_PSTORE_RAM) += ramoops.o
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
new file mode 100644
index 000000000000..9123cce28c1e
--- /dev/null
+++ b/fs/pstore/ram.c
@@ -0,0 +1,383 @@
+/*
+ * RAM Oops/Panic logger
+ *
+ * Copyright (C) 2010 Marco Stornelli <marco.stornelli@gmail.com>
+ * Copyright (C) 2011 Kees Cook <keescook@chromium.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/pstore.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/pstore_ram.h>
+
+#define RAMOOPS_KERNMSG_HDR "===="
+#define MIN_MEM_SIZE 4096UL
+
+static ulong record_size = MIN_MEM_SIZE;
+module_param(record_size, ulong, 0400);
+MODULE_PARM_DESC(record_size,
+ "size of each dump done on oops/panic");
+
+static ulong mem_address;
+module_param(mem_address, ulong, 0400);
+MODULE_PARM_DESC(mem_address,
+ "start of reserved RAM used to store oops/panic logs");
+
+static ulong mem_size;
+module_param(mem_size, ulong, 0400);
+MODULE_PARM_DESC(mem_size,
+ "size of reserved RAM used to store oops/panic logs");
+
+static int dump_oops = 1;
+module_param(dump_oops, int, 0600);
+MODULE_PARM_DESC(dump_oops,
+ "set to 1 to dump oopses, 0 to only dump panics (default 1)");
+
+static int ramoops_ecc;
+module_param_named(ecc, ramoops_ecc, int, 0600);
+MODULE_PARM_DESC(ramoops_ecc,
+ "set to 1 to enable ECC support");
+
+struct ramoops_context {
+ struct persistent_ram_zone **przs;
+ phys_addr_t phys_addr;
+ unsigned long size;
+ size_t record_size;
+ int dump_oops;
+ bool ecc;
+ unsigned int count;
+ unsigned int max_count;
+ unsigned int read_count;
+ struct pstore_info pstore;
+};
+
+static struct platform_device *dummy;
+static struct ramoops_platform_data *dummy_data;
+
+static int ramoops_pstore_open(struct pstore_info *psi)
+{
+ struct ramoops_context *cxt = psi->data;
+
+ cxt->read_count = 0;
+ return 0;
+}
+
+static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
+ struct timespec *time,
+ char **buf,
+ struct pstore_info *psi)
+{
+ ssize_t size;
+ struct ramoops_context *cxt = psi->data;
+ struct persistent_ram_zone *prz;
+
+ if (cxt->read_count >= cxt->max_count)
+ return -EINVAL;
+
+ *id = cxt->read_count++;
+ prz = cxt->przs[*id];
+
+ /* Only supports dmesg output so far. */
+ *type = PSTORE_TYPE_DMESG;
+ /* TODO(kees): Bogus time for the moment. */
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+
+ size = persistent_ram_old_size(prz);
+ *buf = kmalloc(size, GFP_KERNEL);
+ if (*buf == NULL)
+ return -ENOMEM;
+ memcpy(*buf, persistent_ram_old(prz), size);
+
+ return size;
+}
+
+static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz)
+{
+ char *hdr;
+ struct timeval timestamp;
+ size_t len;
+
+ do_gettimeofday(&timestamp);
+ hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
+ (long)timestamp.tv_sec, (long)timestamp.tv_usec);
+ WARN_ON_ONCE(!hdr);
+ len = hdr ? strlen(hdr) : 0;
+ persistent_ram_write(prz, hdr, len);
+ kfree(hdr);
+
+ return len;
+}
+
+static int ramoops_pstore_write(enum pstore_type_id type,
+ enum kmsg_dump_reason reason,
+ u64 *id,
+ unsigned int part,
+ size_t size, struct pstore_info *psi)
+{
+ struct ramoops_context *cxt = psi->data;
+ struct persistent_ram_zone *prz = cxt->przs[cxt->count];
+ size_t hlen;
+
+ /* Currently ramoops is designed to only store dmesg dumps. */
+ if (type != PSTORE_TYPE_DMESG)
+ return -EINVAL;
+
+ /* Out of the various dmesg dump types, ramoops is currently designed
+ * to only store crash logs, rather than storing general kernel logs.
+ */
+ if (reason != KMSG_DUMP_OOPS &&
+ reason != KMSG_DUMP_PANIC)
+ return -EINVAL;
+
+ /* Skip Oopes when configured to do so. */
+ if (reason == KMSG_DUMP_OOPS && !cxt->dump_oops)
+ return -EINVAL;
+
+ /* Explicitly only take the first part of any new crash.
+ * If our buffer is larger than kmsg_bytes, this can never happen,
+ * and if our buffer is smaller than kmsg_bytes, we don't want the
+ * report split across multiple records.
+ */
+ if (part != 1)
+ return -ENOSPC;
+
+ hlen = ramoops_write_kmsg_hdr(prz);
+ if (size + hlen > prz->buffer_size)
+ size = prz->buffer_size - hlen;
+ persistent_ram_write(prz, cxt->pstore.buf, size);
+
+ cxt->count = (cxt->count + 1) % cxt->max_count;
+
+ return 0;
+}
+
+static int ramoops_pstore_erase(enum pstore_type_id type, u64 id,
+ struct pstore_info *psi)
+{
+ struct ramoops_context *cxt = psi->data;
+
+ if (id >= cxt->max_count)
+ return -EINVAL;
+
+ persistent_ram_free_old(cxt->przs[id]);
+
+ return 0;
+}
+
+static struct ramoops_context oops_cxt = {
+ .pstore = {
+ .owner = THIS_MODULE,
+ .name = "ramoops",
+ .open = ramoops_pstore_open,
+ .read = ramoops_pstore_read,
+ .write = ramoops_pstore_write,
+ .erase = ramoops_pstore_erase,
+ },
+};
+
+static int __init ramoops_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct ramoops_platform_data *pdata = pdev->dev.platform_data;
+ struct ramoops_context *cxt = &oops_cxt;
+ int err = -EINVAL;
+ int i;
+
+ /* Only a single ramoops area allowed at a time, so fail extra
+ * probes.
+ */
+ if (cxt->max_count)
+ goto fail_out;
+
+ if (!pdata->mem_size || !pdata->record_size) {
+ pr_err("The memory size and the record size must be "
+ "non-zero\n");
+ goto fail_out;
+ }
+
+ pdata->mem_size = rounddown_pow_of_two(pdata->mem_size);
+ pdata->record_size = rounddown_pow_of_two(pdata->record_size);
+
+ /* Check for the minimum memory size */
+ if (pdata->mem_size < MIN_MEM_SIZE &&
+ pdata->record_size < MIN_MEM_SIZE) {
+ pr_err("memory size too small, minimum is %lu\n",
+ MIN_MEM_SIZE);
+ goto fail_out;
+ }
+
+ if (pdata->mem_size < pdata->record_size) {
+ pr_err("The memory size must be larger than the "
+ "records size\n");
+ goto fail_out;
+ }
+
+ cxt->max_count = pdata->mem_size / pdata->record_size;
+ cxt->count = 0;
+ cxt->size = pdata->mem_size;
+ cxt->phys_addr = pdata->mem_address;
+ cxt->record_size = pdata->record_size;
+ cxt->dump_oops = pdata->dump_oops;
+ cxt->ecc = pdata->ecc;
+
+ cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_count, GFP_KERNEL);
+ if (!cxt->przs) {
+ err = -ENOMEM;
+ dev_err(dev, "failed to initialize a prz array\n");
+ goto fail_out;
+ }
+
+ for (i = 0; i < cxt->max_count; i++) {
+ size_t sz = cxt->record_size;
+ phys_addr_t start = cxt->phys_addr + sz * i;
+
+ cxt->przs[i] = persistent_ram_new(start, sz, cxt->ecc);
+ if (IS_ERR(cxt->przs[i])) {
+ err = PTR_ERR(cxt->przs[i]);
+ dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
+ sz, (unsigned long long)start, err);
+ goto fail_przs;
+ }
+ }
+
+ cxt->pstore.data = cxt;
+ cxt->pstore.bufsize = cxt->przs[0]->buffer_size;
+ cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL);
+ spin_lock_init(&cxt->pstore.buf_lock);
+ if (!cxt->pstore.buf) {
+ pr_err("cannot allocate pstore buffer\n");
+ goto fail_clear;
+ }
+
+ err = pstore_register(&cxt->pstore);
+ if (err) {
+ pr_err("registering with pstore failed\n");
+ goto fail_buf;
+ }
+
+ /*
+ * Update the module parameter variables as well so they are visible
+ * through /sys/module/ramoops/parameters/
+ */
+ mem_size = pdata->mem_size;
+ mem_address = pdata->mem_address;
+ record_size = pdata->record_size;
+ dump_oops = pdata->dump_oops;
+
+ pr_info("attached 0x%lx@0x%llx (%ux0x%zx), ecc: %s\n",
+ cxt->size, (unsigned long long)cxt->phys_addr,
+ cxt->max_count, cxt->record_size,
+ ramoops_ecc ? "on" : "off");
+
+ return 0;
+
+fail_buf:
+ kfree(cxt->pstore.buf);
+fail_clear:
+ cxt->pstore.bufsize = 0;
+ cxt->max_count = 0;
+fail_przs:
+ for (i = 0; cxt->przs[i]; i++)
+ persistent_ram_free(cxt->przs[i]);
+ kfree(cxt->przs);
+fail_out:
+ return err;
+}
+
+static int __exit ramoops_remove(struct platform_device *pdev)
+{
+#if 0
+ /* TODO(kees): We cannot unload ramoops since pstore doesn't support
+ * unregistering yet.
+ */
+ struct ramoops_context *cxt = &oops_cxt;
+
+ iounmap(cxt->virt_addr);
+ release_mem_region(cxt->phys_addr, cxt->size);
+ cxt->max_count = 0;
+
+ /* TODO(kees): When pstore supports unregistering, call it here. */
+ kfree(cxt->pstore.buf);
+ cxt->pstore.bufsize = 0;
+
+ return 0;
+#endif
+ return -EBUSY;
+}
+
+static struct platform_driver ramoops_driver = {
+ .remove = __exit_p(ramoops_remove),
+ .driver = {
+ .name = "ramoops",
+ .owner = THIS_MODULE,
+ },
+};
+
+static int __init ramoops_init(void)
+{
+ int ret;
+ ret = platform_driver_probe(&ramoops_driver, ramoops_probe);
+ if (ret == -ENODEV) {
+ /*
+ * If we didn't find a platform device, we use module parameters
+ * building platform data on the fly.
+ */
+ pr_info("platform device not found, using module parameters\n");
+ dummy_data = kzalloc(sizeof(struct ramoops_platform_data),
+ GFP_KERNEL);
+ if (!dummy_data)
+ return -ENOMEM;
+ dummy_data->mem_size = mem_size;
+ dummy_data->mem_address = mem_address;
+ dummy_data->record_size = record_size;
+ dummy_data->dump_oops = dump_oops;
+ dummy_data->ecc = ramoops_ecc;
+ dummy = platform_create_bundle(&ramoops_driver, ramoops_probe,
+ NULL, 0, dummy_data,
+ sizeof(struct ramoops_platform_data));
+
+ if (IS_ERR(dummy))
+ ret = PTR_ERR(dummy);
+ else
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static void __exit ramoops_exit(void)
+{
+ platform_driver_unregister(&ramoops_driver);
+ kfree(dummy_data);
+}
+
+module_init(ramoops_init);
+module_exit(ramoops_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marco Stornelli <marco.stornelli@gmail.com>");
+MODULE_DESCRIPTION("RAM Oops/Panic logger/driver");
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
new file mode 100644
index 000000000000..31f8d184f3a0
--- /dev/null
+++ b/fs/pstore/ram_core.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright (C) 2012 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/memblock.h>
+#include <linux/rslib.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pstore_ram.h>
+#include <asm/page.h>
+
+struct persistent_ram_buffer {
+ uint32_t sig;
+ atomic_t start;
+ atomic_t size;
+ uint8_t data[0];
+};
+
+#define PERSISTENT_RAM_SIG (0x43474244) /* DBGC */
+
+static __initdata LIST_HEAD(persistent_ram_list);
+
+static inline size_t buffer_size(struct persistent_ram_zone *prz)
+{
+ return atomic_read(&prz->buffer->size);
+}
+
+static inline size_t buffer_start(struct persistent_ram_zone *prz)
+{
+ return atomic_read(&prz->buffer->start);
+}
+
+/* increase and wrap the start pointer, returning the old value */
+static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
+{
+ int old;
+ int new;
+
+ do {
+ old = atomic_read(&prz->buffer->start);
+ new = old + a;
+ while (unlikely(new > prz->buffer_size))
+ new -= prz->buffer_size;
+ } while (atomic_cmpxchg(&prz->buffer->start, old, new) != old);
+
+ return old;
+}
+
+/* increase the size counter until it hits the max size */
+static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
+{
+ size_t old;
+ size_t new;
+
+ if (atomic_read(&prz->buffer->size) == prz->buffer_size)
+ return;
+
+ do {
+ old = atomic_read(&prz->buffer->size);
+ new = old + a;
+ if (new > prz->buffer_size)
+ new = prz->buffer_size;
+ } while (atomic_cmpxchg(&prz->buffer->size, old, new) != old);
+}
+
+static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
+ uint8_t *data, size_t len, uint8_t *ecc)
+{
+ int i;
+ uint16_t par[prz->ecc_size];
+
+ /* Initialize the parity buffer */
+ memset(par, 0, sizeof(par));
+ encode_rs8(prz->rs_decoder, data, len, par, 0);
+ for (i = 0; i < prz->ecc_size; i++)
+ ecc[i] = par[i];
+}
+
+static int persistent_ram_decode_rs8(struct persistent_ram_zone *prz,
+ void *data, size_t len, uint8_t *ecc)
+{
+ int i;
+ uint16_t par[prz->ecc_size];
+
+ for (i = 0; i < prz->ecc_size; i++)
+ par[i] = ecc[i];
+ return decode_rs8(prz->rs_decoder, data, par, len,
+ NULL, 0, NULL, 0, NULL);
+}
+
+static void notrace persistent_ram_update_ecc(struct persistent_ram_zone *prz,
+ unsigned int start, unsigned int count)
+{
+ struct persistent_ram_buffer *buffer = prz->buffer;
+ uint8_t *buffer_end = buffer->data + prz->buffer_size;
+ uint8_t *block;
+ uint8_t *par;
+ int ecc_block_size = prz->ecc_block_size;
+ int ecc_size = prz->ecc_size;
+ int size = prz->ecc_block_size;
+
+ if (!prz->ecc)
+ return;
+
+ block = buffer->data + (start & ~(ecc_block_size - 1));
+ par = prz->par_buffer + (start / ecc_block_size) * prz->ecc_size;
+
+ do {
+ if (block + ecc_block_size > buffer_end)
+ size = buffer_end - block;
+ persistent_ram_encode_rs8(prz, block, size, par);
+ block += ecc_block_size;
+ par += ecc_size;
+ } while (block < buffer->data + start + count);
+}
+
+static void persistent_ram_update_header_ecc(struct persistent_ram_zone *prz)
+{
+ struct persistent_ram_buffer *buffer = prz->buffer;
+
+ if (!prz->ecc)
+ return;
+
+ persistent_ram_encode_rs8(prz, (uint8_t *)buffer, sizeof(*buffer),
+ prz->par_header);
+}
+
+static void persistent_ram_ecc_old(struct persistent_ram_zone *prz)
+{
+ struct persistent_ram_buffer *buffer = prz->buffer;
+ uint8_t *block;
+ uint8_t *par;
+
+ if (!prz->ecc)
+ return;
+
+ block = buffer->data;
+ par = prz->par_buffer;
+ while (block < buffer->data + buffer_size(prz)) {
+ int numerr;
+ int size = prz->ecc_block_size;
+ if (block + size > buffer->data + prz->buffer_size)
+ size = buffer->data + prz->buffer_size - block;
+ numerr = persistent_ram_decode_rs8(prz, block, size, par);
+ if (numerr > 0) {
+ pr_devel("persistent_ram: error in block %p, %d\n",
+ block, numerr);
+ prz->corrected_bytes += numerr;
+ } else if (numerr < 0) {
+ pr_devel("persistent_ram: uncorrectable error in block %p\n",
+ block);
+ prz->bad_blocks++;
+ }
+ block += prz->ecc_block_size;
+ par += prz->ecc_size;
+ }
+}
+
+static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
+ size_t buffer_size)
+{
+ int numerr;
+ struct persistent_ram_buffer *buffer = prz->buffer;
+ int ecc_blocks;
+
+ if (!prz->ecc)
+ return 0;
+
+ prz->ecc_block_size = 128;
+ prz->ecc_size = 16;
+ prz->ecc_symsize = 8;
+ prz->ecc_poly = 0x11d;
+
+ ecc_blocks = DIV_ROUND_UP(prz->buffer_size, prz->ecc_block_size);
+ prz->buffer_size -= (ecc_blocks + 1) * prz->ecc_size;
+
+ if (prz->buffer_size > buffer_size) {
+ pr_err("persistent_ram: invalid size %zu, non-ecc datasize %zu\n",
+ buffer_size, prz->buffer_size);
+ return -EINVAL;
+ }
+
+ prz->par_buffer = buffer->data + prz->buffer_size;
+ prz->par_header = prz->par_buffer + ecc_blocks * prz->ecc_size;
+
+ /*
+ * first consecutive root is 0
+ * primitive element to generate roots = 1
+ */
+ prz->rs_decoder = init_rs(prz->ecc_symsize, prz->ecc_poly, 0, 1,
+ prz->ecc_size);
+ if (prz->rs_decoder == NULL) {
+ pr_info("persistent_ram: init_rs failed\n");
+ return -EINVAL;
+ }
+
+ prz->corrected_bytes = 0;
+ prz->bad_blocks = 0;
+
+ numerr = persistent_ram_decode_rs8(prz, buffer, sizeof(*buffer),
+ prz->par_header);
+ if (numerr > 0) {
+ pr_info("persistent_ram: error in header, %d\n", numerr);
+ prz->corrected_bytes += numerr;
+ } else if (numerr < 0) {
+ pr_info("persistent_ram: uncorrectable error in header\n");
+ prz->bad_blocks++;
+ }
+
+ return 0;
+}
+
+ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
+ char *str, size_t len)
+{
+ ssize_t ret;
+
+ if (prz->corrected_bytes || prz->bad_blocks)
+ ret = snprintf(str, len, ""
+ "\n%d Corrected bytes, %d unrecoverable blocks\n",
+ prz->corrected_bytes, prz->bad_blocks);
+ else
+ ret = snprintf(str, len, "\nNo errors detected\n");
+
+ return ret;
+}
+
+static void notrace persistent_ram_update(struct persistent_ram_zone *prz,
+ const void *s, unsigned int start, unsigned int count)
+{
+ struct persistent_ram_buffer *buffer = prz->buffer;
+ memcpy(buffer->data + start, s, count);
+ persistent_ram_update_ecc(prz, start, count);
+}
+
+static void __init
+persistent_ram_save_old(struct persistent_ram_zone *prz)
+{
+ struct persistent_ram_buffer *buffer = prz->buffer;
+ size_t size = buffer_size(prz);
+ size_t start = buffer_start(prz);
+ char *dest;
+
+ persistent_ram_ecc_old(prz);
+
+ dest = kmalloc(size, GFP_KERNEL);
+ if (dest == NULL) {
+ pr_err("persistent_ram: failed to allocate buffer\n");
+ return;
+ }
+
+ prz->old_log = dest;
+ prz->old_log_size = size;
+ memcpy(prz->old_log, &buffer->data[start], size - start);
+ memcpy(prz->old_log + size - start, &buffer->data[0], start);
+}
+
+int notrace persistent_ram_write(struct persistent_ram_zone *prz,
+ const void *s, unsigned int count)
+{
+ int rem;
+ int c = count;
+ size_t start;
+
+ if (unlikely(c > prz->buffer_size)) {
+ s += c - prz->buffer_size;
+ c = prz->buffer_size;
+ }
+
+ buffer_size_add(prz, c);
+
+ start = buffer_start_add(prz, c);
+
+ rem = prz->buffer_size - start;
+ if (unlikely(rem < c)) {
+ persistent_ram_update(prz, s, start, rem);
+ s += rem;
+ c -= rem;
+ start = 0;
+ }
+ persistent_ram_update(prz, s, start, c);
+
+ persistent_ram_update_header_ecc(prz);
+
+ return count;
+}
+
+size_t persistent_ram_old_size(struct persistent_ram_zone *prz)
+{
+ return prz->old_log_size;
+}
+
+void *persistent_ram_old(struct persistent_ram_zone *prz)
+{
+ return prz->old_log;
+}
+
+void persistent_ram_free_old(struct persistent_ram_zone *prz)
+{
+ kfree(prz->old_log);
+ prz->old_log = NULL;
+ prz->old_log_size = 0;
+}
+
+static void *persistent_ram_vmap(phys_addr_t start, size_t size)
+{
+ struct page **pages;
+ phys_addr_t page_start;
+ unsigned int page_count;
+ pgprot_t prot;
+ unsigned int i;
+ void *vaddr;
+
+ page_start = start - offset_in_page(start);
+ page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE);
+
+ prot = pgprot_noncached(PAGE_KERNEL);
+
+ pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
+ if (!pages) {
+ pr_err("%s: Failed to allocate array for %u pages\n", __func__,
+ page_count);
+ return NULL;
+ }
+
+ for (i = 0; i < page_count; i++) {
+ phys_addr_t addr = page_start + i * PAGE_SIZE;
+ pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
+ }
+ vaddr = vmap(pages, page_count, VM_MAP, prot);
+ kfree(pages);
+
+ return vaddr;
+}
+
+static void *persistent_ram_iomap(phys_addr_t start, size_t size)
+{
+ if (!request_mem_region(start, size, "persistent_ram")) {
+ pr_err("request mem region (0x%llx@0x%llx) failed\n",
+ (unsigned long long)size, (unsigned long long)start);
+ return NULL;
+ }
+
+ return ioremap(start, size);
+}
+
+static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
+ struct persistent_ram_zone *prz)
+{
+ prz->paddr = start;
+ prz->size = size;
+
+ if (pfn_valid(start >> PAGE_SHIFT))
+ prz->vaddr = persistent_ram_vmap(start, size);
+ else
+ prz->vaddr = persistent_ram_iomap(start, size);
+
+ if (!prz->vaddr) {
+ pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__,
+ (unsigned long long)size, (unsigned long long)start);
+ return -ENOMEM;
+ }
+
+ prz->buffer = prz->vaddr + offset_in_page(start);
+ prz->buffer_size = size - sizeof(struct persistent_ram_buffer);
+
+ return 0;
+}
+
+static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool ecc)
+{
+ int ret;
+
+ prz->ecc = ecc;
+
+ ret = persistent_ram_init_ecc(prz, prz->buffer_size);
+ if (ret)
+ return ret;
+
+ if (prz->buffer->sig == PERSISTENT_RAM_SIG) {
+ if (buffer_size(prz) > prz->buffer_size ||
+ buffer_start(prz) > buffer_size(prz))
+ pr_info("persistent_ram: found existing invalid buffer,"
+ " size %zu, start %zu\n",
+ buffer_size(prz), buffer_start(prz));
+ else {
+ pr_info("persistent_ram: found existing buffer,"
+ " size %zu, start %zu\n",
+ buffer_size(prz), buffer_start(prz));
+ persistent_ram_save_old(prz);
+ }
+ } else {
+ pr_info("persistent_ram: no valid data in buffer"
+ " (sig = 0x%08x)\n", prz->buffer->sig);
+ }
+
+ prz->buffer->sig = PERSISTENT_RAM_SIG;
+ atomic_set(&prz->buffer->start, 0);
+ atomic_set(&prz->buffer->size, 0);
+
+ return 0;
+}
+
+void persistent_ram_free(struct persistent_ram_zone *prz)
+{
+ if (pfn_valid(prz->paddr >> PAGE_SHIFT)) {
+ vunmap(prz->vaddr);
+ } else {
+ iounmap(prz->vaddr);
+ release_mem_region(prz->paddr, prz->size);
+ }
+ persistent_ram_free_old(prz);
+ kfree(prz);
+}
+
+struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start,
+ size_t size,
+ bool ecc)
+{
+ struct persistent_ram_zone *prz;
+ int ret = -ENOMEM;
+
+ prz = kzalloc(sizeof(struct persistent_ram_zone), GFP_KERNEL);
+ if (!prz) {
+ pr_err("persistent_ram: failed to allocate persistent ram zone\n");
+ goto err;
+ }
+
+ ret = persistent_ram_buffer_map(start, size, prz);
+ if (ret)
+ goto err;
+
+ persistent_ram_post_init(prz, ecc);
+ persistent_ram_update_header_ecc(prz);
+
+ return prz;
+err:
+ kfree(prz);
+ return ERR_PTR(ret);
+}
+
+#ifndef MODULE
+static int __init persistent_ram_buffer_init(const char *name,
+ struct persistent_ram_zone *prz)
+{
+ int i;
+ struct persistent_ram *ram;
+ struct persistent_ram_descriptor *desc;
+ phys_addr_t start;
+
+ list_for_each_entry(ram, &persistent_ram_list, node) {
+ start = ram->start;
+ for (i = 0; i < ram->num_descs; i++) {
+ desc = &ram->descs[i];
+ if (!strcmp(desc->name, name))
+ return persistent_ram_buffer_map(start,
+ desc->size, prz);
+ start += desc->size;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static __init
+struct persistent_ram_zone *__persistent_ram_init(struct device *dev, bool ecc)
+{
+ struct persistent_ram_zone *prz;
+ int ret = -ENOMEM;
+
+ prz = kzalloc(sizeof(struct persistent_ram_zone), GFP_KERNEL);
+ if (!prz) {
+ pr_err("persistent_ram: failed to allocate persistent ram zone\n");
+ goto err;
+ }
+
+ ret = persistent_ram_buffer_init(dev_name(dev), prz);
+ if (ret) {
+ pr_err("persistent_ram: failed to initialize buffer\n");
+ goto err;
+ }
+
+ persistent_ram_post_init(prz, ecc);
+
+ return prz;
+err:
+ kfree(prz);
+ return ERR_PTR(ret);
+}
+
+struct persistent_ram_zone * __init
+persistent_ram_init_ringbuffer(struct device *dev, bool ecc)
+{
+ return __persistent_ram_init(dev, ecc);
+}
+
+int __init persistent_ram_early_init(struct persistent_ram *ram)
+{
+ int ret;
+
+ ret = memblock_reserve(ram->start, ram->size);
+ if (ret) {
+ pr_err("Failed to reserve persistent memory from %08lx-%08lx\n",
+ (long)ram->start, (long)(ram->start + ram->size - 1));
+ return ret;
+ }
+
+ list_add_tail(&ram->node, &persistent_ram_list);
+
+ pr_info("Initialized persistent memory from %08lx-%08lx\n",
+ (long)ram->start, (long)(ram->start + ram->size - 1));
+
+ return 0;
+}
+#endif
diff --git a/fs/stat.c b/fs/stat.c
index c733dc5753ae..0cef3366a919 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,12 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
int vfs_fstat(unsigned int fd, struct kstat *stat)
{
- struct file *f = fget(fd);
+ int fput_needed;
+ struct file *f = fget_light(fd, &fput_needed);
int error = -EBADF;
if (f) {
error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
- fput(f);
+ fput_light(f, fput_needed);
}
return error;
}
@@ -190,24 +191,32 @@ SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, stat
#endif /* __ARCH_WANT_OLD_STAT */
+#if BITS_PER_LONG == 32
+# define choose_32_64(a,b) a
+#else
+# define choose_32_64(a,b) b
+#endif
+
+#define valid_dev(x) choose_32_64(old_valid_dev,new_valid_dev)(x)
+#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x)
+
+#ifndef INIT_STRUCT_STAT_PADDING
+# define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
+#endif
+
static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
{
struct stat tmp;
-#if BITS_PER_LONG == 32
- if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
+ if (!valid_dev(stat->dev) || !valid_dev(stat->rdev))
return -EOVERFLOW;
-#else
- if (!new_valid_dev(stat->dev) || !new_valid_dev(stat->rdev))
+#if BITS_PER_LONG == 32
+ if (stat->size > MAX_NON_LFS)
return -EOVERFLOW;
#endif
- memset(&tmp, 0, sizeof(tmp));
-#if BITS_PER_LONG == 32
- tmp.st_dev = old_encode_dev(stat->dev);
-#else
- tmp.st_dev = new_encode_dev(stat->dev);
-#endif
+ INIT_STRUCT_STAT_PADDING(tmp);
+ tmp.st_dev = encode_dev(stat->dev);
tmp.st_ino = stat->ino;
if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
return -EOVERFLOW;
@@ -217,15 +226,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
return -EOVERFLOW;
SET_UID(tmp.st_uid, stat->uid);
SET_GID(tmp.st_gid, stat->gid);
-#if BITS_PER_LONG == 32
- tmp.st_rdev = old_encode_dev(stat->rdev);
-#else
- tmp.st_rdev = new_encode_dev(stat->rdev);
-#endif
-#if BITS_PER_LONG == 32
- if (stat->size > MAX_NON_LFS)
- return -EOVERFLOW;
-#endif
+ tmp.st_rdev = encode_dev(stat->rdev);
tmp.st_size = stat->size;
tmp.st_atime = stat->atime.tv_sec;
tmp.st_mtime = stat->mtime.tv_sec;
@@ -327,11 +328,15 @@ SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf,
/* ---------- LFS-64 ----------- */
#ifdef __ARCH_WANT_STAT64
+#ifndef INIT_STRUCT_STAT64_PADDING
+# define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st))
+#endif
+
static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf)
{
struct stat64 tmp;
- memset(&tmp, 0, sizeof(struct stat64));
+ INIT_STRUCT_STAT64_PADDING(tmp);
#ifdef CONFIG_MIPS
/* mips has weird padding, so we don't get 64 bits there */
if (!new_valid_dev(stat->dev) || !new_valid_dev(stat->rdev))
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 35a36d39fa2c..e6bb9b2a4cbe 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -132,6 +132,24 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+/* Test for attributes that want to ignore lockdep for read-locking */
+static bool ignore_lockdep(struct sysfs_dirent *sd)
+{
+ return sysfs_type(sd) == SYSFS_KOBJ_ATTR &&
+ sd->s_attr.attr->ignore_lockdep;
+}
+
+#else
+
+static inline bool ignore_lockdep(struct sysfs_dirent *sd)
+{
+ return true;
+}
+
+#endif
+
/**
* sysfs_get_active - get an active reference to sysfs_dirent
* @sd: sysfs_dirent to get an active reference to
@@ -155,15 +173,17 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
return NULL;
t = atomic_cmpxchg(&sd->s_active, v, v + 1);
- if (likely(t == v)) {
- rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
- return sd;
- }
+ if (likely(t == v))
+ break;
if (t < 0)
return NULL;
cpu_relax();
}
+
+ if (likely(!ignore_lockdep(sd)))
+ rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
+ return sd;
}
/**
@@ -180,7 +200,8 @@ void sysfs_put_active(struct sysfs_dirent *sd)
if (unlikely(!sd))
return;
- rwsem_release(&sd->dep_map, 1, _RET_IP_);
+ if (likely(!ignore_lockdep(sd)))
+ rwsem_release(&sd->dep_map, 1, _RET_IP_);
v = atomic_dec_return(&sd->s_active);
if (likely(v != SD_DEACTIVATED_BIAS))
return;
@@ -858,7 +879,6 @@ int sysfs_rename(struct sysfs_dirent *sd,
struct sysfs_dirent *new_parent_sd, const void *new_ns,
const char *new_name)
{
- const char *dup_name = NULL;
int error;
mutex_lock(&sysfs_mutex);
@@ -875,11 +895,11 @@ int sysfs_rename(struct sysfs_dirent *sd,
/* rename sysfs_dirent */
if (strcmp(sd->s_name, new_name) != 0) {
error = -ENOMEM;
- new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
+ new_name = kstrdup(new_name, GFP_KERNEL);
if (!new_name)
goto out;
- dup_name = sd->s_name;
+ kfree(sd->s_name);
sd->s_name = new_name;
}
@@ -895,7 +915,6 @@ int sysfs_rename(struct sysfs_dirent *sd,
error = 0;
out:
mutex_unlock(&sysfs_mutex);
- kfree(dup_name);
return error;
}
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 16ad84d8402f..abd51331345e 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2361,7 +2361,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
* by passing 'ubifs_tnc_remove_nm()' the same key but
* an unmatchable name.
*/
- struct qstr noname = { .len = 0, .name = "" };
+ struct qstr noname = { .name = "" };
err = dbg_check_tnc(c, 0);
mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 85b272268754..7a8bafa19c9f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -298,7 +298,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
- struct qstr nm = { .name = name, .len = strlen(name) };
+ struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_dent_node *xent;
union ubifs_key key;
int err, type;
@@ -361,7 +361,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
- struct qstr nm = { .name = name, .len = strlen(name) };
+ struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_inode *ui;
struct ubifs_dent_node *xent;
union ubifs_key key;
@@ -524,7 +524,7 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
{
struct inode *inode, *host = dentry->d_inode;
struct ubifs_info *c = host->i_sb->s_fs_info;
- struct qstr nm = { .name = name, .len = strlen(name) };
+ struct qstr nm = QSTR_INIT(name, strlen(name));
struct ubifs_dent_node *xent;
union ubifs_key key;
int err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 38de8f234b94..a165c66e3eef 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1193,7 +1193,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
{
struct kernel_lb_addr tloc;
struct inode *inode = NULL;
- struct qstr dotdot = {.name = "..", .len = 2};
+ struct qstr dotdot = QSTR_INIT("..", 2);
struct fileIdentDesc cfi;
struct udf_fileident_bh fibh;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index ac8e279eccc6..302f340d0071 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -146,10 +146,7 @@ static struct dentry *ufs_fh_to_parent(struct super_block *sb, struct fid *fid,
static struct dentry *ufs_get_parent(struct dentry *child)
{
- struct qstr dot_dot = {
- .name = "..",
- .len = 2,
- };
+ struct qstr dot_dot = QSTR_INIT("..", 2);
ino_t ino;
ino = ufs_inode_by_name(child->d_inode, &dot_dot);