aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/dax.c2
-rw-r--r--fs/ext2/Kconfig1
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/userfaultfd.c37
7 files changed, 60 insertions, 7 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index c2a377cdda2b..83eab52fb3f6 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -38,6 +38,7 @@ config FS_DAX
bool "Direct Access (DAX) support"
depends on MMU
depends on !(ARM || MIPS || SPARC)
+ select FS_IOMAP
help
Direct Access (DAX) can be used on memory-backed block devices.
If the block device supports DAX and the filesystem supports DAX,
diff --git a/fs/dax.c b/fs/dax.c
index ddcddfeaa03b..3af2da5e64ce 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -990,7 +990,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
-#ifdef CONFIG_FS_IOMAP
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
{
return iomap->blkno + (((pos & PAGE_MASK) - iomap->offset) >> 9);
@@ -1428,4 +1427,3 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
#endif /* CONFIG_FS_DAX_PMD */
-#endif /* CONFIG_FS_IOMAP */
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig
index 36bea5adcaba..c634874e12d9 100644
--- a/fs/ext2/Kconfig
+++ b/fs/ext2/Kconfig
@@ -1,6 +1,5 @@
config EXT2_FS
tristate "Second extended fs support"
- select FS_IOMAP if FS_DAX
help
Ext2 is a standard Linux file system for hard disks.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 7b90691e98c4..e38039fd96ff 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -37,7 +37,6 @@ config EXT4_FS
select CRC16
select CRYPTO
select CRYPTO_CRC32C
- select FS_IOMAP if FS_DAX
help
This is the next generation of the ext3 filesystem.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8e7e61b28f31..87c9a9aacda3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3179,6 +3179,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
iter.tgid += 1, iter = next_tgid(ns, iter)) {
char name[PROC_NUMBUF];
int len;
+
+ cond_resched();
if (!has_pid_permissions(ns, iter.task, 2))
continue;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index d0f8a38dfafa..0186fe6d39f3 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -74,6 +74,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
+#include <linux/major.h>
#include "internal.h"
static struct kmem_cache *romfs_inode_cachep;
@@ -416,7 +417,22 @@ static void romfs_destroy_inode(struct inode *inode)
static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
- u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+ u64 id = 0;
+
+ /* When calling huge_encode_dev(),
+ * use sb->s_bdev->bd_dev when,
+ * - CONFIG_ROMFS_ON_BLOCK defined
+ * use sb->s_dev when,
+ * - CONFIG_ROMFS_ON_BLOCK undefined and
+ * - CONFIG_ROMFS_ON_MTD defined
+ * leave id as 0 when,
+ * - CONFIG_ROMFS_ON_BLOCK undefined and
+ * - CONFIG_ROMFS_ON_MTD undefined
+ */
+ if (sb->s_bdev)
+ id = huge_encode_dev(sb->s_bdev->bd_dev);
+ else if (sb->s_dev)
+ id = huge_encode_dev(sb->s_dev);
buf->f_type = ROMFS_MAGIC;
buf->f_namelen = ROMFS_MAXFN;
@@ -489,6 +505,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_RDONLY | MS_NOATIME;
sb->s_op = &romfs_super_ops;
+#ifdef CONFIG_ROMFS_ON_MTD
+ /* Use same dev ID from the underlying mtdblock device */
+ if (sb->s_mtd)
+ sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
+#endif
/* read the image superblock and check it */
rsb = kmalloc(512, GFP_KERNEL);
if (!rsb)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index d96e2f30084b..43953e03c356 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -63,6 +63,7 @@ struct userfaultfd_wait_queue {
struct uffd_msg msg;
wait_queue_t wq;
struct userfaultfd_ctx *ctx;
+ bool waken;
};
struct userfaultfd_wake_range {
@@ -86,6 +87,12 @@ static int userfaultfd_wake_function(wait_queue_t *wq, unsigned mode,
if (len && (start > uwq->msg.arg.pagefault.address ||
start + len <= uwq->msg.arg.pagefault.address))
goto out;
+ WRITE_ONCE(uwq->waken, true);
+ /*
+ * The implicit smp_mb__before_spinlock in try_to_wake_up()
+ * renders uwq->waken visible to other CPUs before the task is
+ * waken.
+ */
ret = wake_up_state(wq->private, mode);
if (ret)
/*
@@ -264,6 +271,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
struct userfaultfd_wait_queue uwq;
int ret;
bool must_wait, return_to_userland;
+ long blocking_state;
BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
@@ -334,10 +342,13 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
uwq.wq.private = current;
uwq.msg = userfault_msg(vmf->address, vmf->flags, reason);
uwq.ctx = ctx;
+ uwq.waken = false;
return_to_userland =
(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
+ blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
+ TASK_KILLABLE;
spin_lock(&ctx->fault_pending_wqh.lock);
/*
@@ -350,8 +361,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
* following the spin_unlock to happen before the list_add in
* __add_wait_queue.
*/
- set_current_state(return_to_userland ? TASK_INTERRUPTIBLE :
- TASK_KILLABLE);
+ set_current_state(blocking_state);
spin_unlock(&ctx->fault_pending_wqh.lock);
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
@@ -364,6 +374,29 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
wake_up_poll(&ctx->fd_wqh, POLLIN);
schedule();
ret |= VM_FAULT_MAJOR;
+
+ /*
+ * False wakeups can orginate even from rwsem before
+ * up_read() however userfaults will wait either for a
+ * targeted wakeup on the specific uwq waitqueue from
+ * wake_userfault() or for signals or for uffd
+ * release.
+ */
+ while (!READ_ONCE(uwq.waken)) {
+ /*
+ * This needs the full smp_store_mb()
+ * guarantee as the state write must be
+ * visible to other CPUs before reading
+ * uwq.waken from other CPUs.
+ */
+ set_current_state(blocking_state);
+ if (READ_ONCE(uwq.waken) ||
+ READ_ONCE(ctx->released) ||
+ (return_to_userland ? signal_pending(current) :
+ fatal_signal_pending(current)))
+ break;
+ schedule();
+ }
}
__set_current_state(TASK_RUNNING);