aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dax.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dax.c')
-rw-r--r--fs/dax.c171
1 files changed, 103 insertions, 68 deletions
diff --git a/fs/dax.c b/fs/dax.c
index c45598b912e1..de622d4282a6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -27,6 +27,7 @@
#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/uio.h>
#include <linux/vmstat.h>
#include <linux/pfn_t.h>
@@ -35,6 +36,9 @@
#include <linux/iomap.h>
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/fs_dax.h>
+
/* We choose 4096 entries - same as per-zone page wait tables */
#define DAX_WAIT_TABLE_BITS 12
#define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS)
@@ -922,12 +926,11 @@ static int dax_insert_mapping(struct address_space *mapping,
/**
* dax_pfn_mkwrite - handle first write to DAX page
- * @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
*/
-int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+int dax_pfn_mkwrite(struct vm_fault *vmf)
{
- struct file *file = vma->vm_file;
+ struct file *file = vmf->vma->vm_file;
struct address_space *mapping = file->f_mapping;
void *entry, **slot;
pgoff_t index = vmf->pgoff;
@@ -1079,15 +1082,19 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
*/
ssize_t
dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
- struct iomap_ops *ops)
+ const struct iomap_ops *ops)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
loff_t pos = iocb->ki_pos, ret = 0, done = 0;
unsigned flags = 0;
- if (iov_iter_rw(iter) == WRITE)
+ if (iov_iter_rw(iter) == WRITE) {
+ lockdep_assert_held_exclusive(&inode->i_rwsem);
flags |= IOMAP_WRITE;
+ } else {
+ lockdep_assert_held(&inode->i_rwsem);
+ }
while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
@@ -1112,20 +1119,10 @@ static int dax_fault_return(int error)
return VM_FAULT_SIGBUS;
}
-/**
- * dax_iomap_fault - handle a page fault on a DAX file
- * @vma: The virtual memory area where the fault occurred
- * @vmf: The description of the fault
- * @ops: iomap ops passed from the file system
- *
- * When a page fault occurs, filesystems may call this helper in their fault
- * or mkwrite handler for DAX files. Assumes the caller has done all the
- * necessary locking for the page fault to proceed successfully.
- */
-int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- struct iomap_ops *ops)
+static int dax_iomap_pte_fault(struct vm_fault *vmf,
+ const struct iomap_ops *ops)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct inode *inode = mapping->host;
unsigned long vaddr = vmf->address;
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
@@ -1198,11 +1195,11 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
case IOMAP_MAPPED:
if (iomap.flags & IOMAP_F_NEW) {
count_vm_event(PGMAJFAULT);
- mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+ mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
error = dax_insert_mapping(mapping, iomap.bdev, sector,
- PAGE_SIZE, &entry, vma, vmf);
+ PAGE_SIZE, &entry, vmf->vma, vmf);
/* -EBUSY is fine, somebody else faulted on the same PTE */
if (error == -EBUSY)
error = 0;
@@ -1240,7 +1237,6 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
}
return vmf_ret;
}
-EXPORT_SYMBOL_GPL(dax_iomap_fault);
#ifdef CONFIG_FS_DAX_PMD
/*
@@ -1249,21 +1245,21 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
*/
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
-static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
- struct vm_fault *vmf, unsigned long address,
- struct iomap *iomap, loff_t pos, bool write, void **entryp)
+static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
+ loff_t pos, void **entryp)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct block_device *bdev = iomap->bdev;
+ struct inode *inode = mapping->host;
struct blk_dax_ctl dax = {
.sector = dax_iomap_sector(iomap, pos),
.size = PMD_SIZE,
};
long length = dax_map_atomic(bdev, &dax);
- void *ret;
+ void *ret = NULL;
if (length < 0) /* dax_map_atomic() failed */
- return VM_FAULT_FALLBACK;
+ goto fallback;
if (length < PMD_SIZE)
goto unmap_fallback;
if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
@@ -1276,67 +1272,87 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
RADIX_DAX_PMD);
if (IS_ERR(ret))
- return VM_FAULT_FALLBACK;
+ goto fallback;
*entryp = ret;
- return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
+ trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret);
+ return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+ dax.pfn, vmf->flags & FAULT_FLAG_WRITE);
unmap_fallback:
dax_unmap_atomic(bdev, &dax);
+fallback:
+ trace_dax_pmd_insert_mapping_fallback(inode, vmf, length,
+ dax.pfn, ret);
return VM_FAULT_FALLBACK;
}
-static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
- struct vm_fault *vmf, unsigned long address,
- struct iomap *iomap, void **entryp)
+static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
+ void **entryp)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
- unsigned long pmd_addr = address & PMD_MASK;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ struct inode *inode = mapping->host;
struct page *zero_page;
+ void *ret = NULL;
spinlock_t *ptl;
pmd_t pmd_entry;
- void *ret;
- zero_page = mm_get_huge_zero_page(vma->vm_mm);
+ zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
if (unlikely(!zero_page))
- return VM_FAULT_FALLBACK;
+ goto fallback;
ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
RADIX_DAX_PMD | RADIX_DAX_HZP);
if (IS_ERR(ret))
- return VM_FAULT_FALLBACK;
+ goto fallback;
*entryp = ret;
- ptl = pmd_lock(vma->vm_mm, pmd);
- if (!pmd_none(*pmd)) {
+ ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
+ if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
- return VM_FAULT_FALLBACK;
+ goto fallback;
}
- pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
+ pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
- set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
+ set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
+ trace_dax_pmd_load_hole(inode, vmf, zero_page, ret);
return VM_FAULT_NOPAGE;
+
+fallback:
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret);
+ return VM_FAULT_FALLBACK;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
+static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+ const struct iomap_ops *ops)
{
+ struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
- unsigned long pmd_addr = address & PMD_MASK;
- bool write = flags & FAULT_FLAG_WRITE;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
struct inode *inode = mapping->host;
int result = VM_FAULT_FALLBACK;
struct iomap iomap = { 0 };
pgoff_t max_pgoff, pgoff;
- struct vm_fault vmf;
void *entry;
loff_t pos;
int error;
+ /*
+ * Check whether offset isn't beyond end of file now. Caller is
+ * supposed to hold locks serializing us with truncate / punch hole so
+ * this is a reliable test.
+ */
+ pgoff = linear_page_index(vma, pmd_addr);
+ max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+
+ trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
+
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
goto fallback;
@@ -1347,16 +1363,10 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
goto fallback;
- /*
- * Check whether offset isn't beyond end of file now. Caller is
- * supposed to hold locks serializing us with truncate / punch hole so
- * this is a reliable test.
- */
- pgoff = linear_page_index(vma, pmd_addr);
- max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
-
- if (pgoff > max_pgoff)
- return VM_FAULT_SIGBUS;
+ if (pgoff > max_pgoff) {
+ result = VM_FAULT_SIGBUS;
+ goto out;
+ }
/* If the PMD would extend beyond the file size */
if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
@@ -1385,21 +1395,15 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (IS_ERR(entry))
goto finish_iomap;
- vmf.pgoff = pgoff;
- vmf.flags = flags;
- vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
-
switch (iomap.type) {
case IOMAP_MAPPED:
- result = dax_pmd_insert_mapping(vma, pmd, &vmf, address,
- &iomap, pos, write, &entry);
+ result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (WARN_ON_ONCE(write))
goto unlock_entry;
- result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
- &entry);
+ result = dax_pmd_load_hole(vmf, &iomap, &entry);
break;
default:
WARN_ON_ONCE(1);
@@ -1425,10 +1429,41 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
fallback:
if (result == VM_FAULT_FALLBACK) {
- split_huge_pmd(vma, pmd, address);
+ split_huge_pmd(vma, vmf->pmd, vmf->address);
count_vm_event(THP_FAULT_FALLBACK);
}
+out:
+ trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
return result;
}
-EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
+#else
+static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+ const struct iomap_ops *ops)
+{
+ return VM_FAULT_FALLBACK;
+}
#endif /* CONFIG_FS_DAX_PMD */
+
+/**
+ * dax_iomap_fault - handle a page fault on a DAX file
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in
+ * their fault handler for DAX files. dax_iomap_fault() assumes the caller
+ * has done all the necessary locking for page fault to proceed
+ * successfully.
+ */
+int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+ const struct iomap_ops *ops)
+{
+ switch (pe_size) {
+ case PE_SIZE_PTE:
+ return dax_iomap_pte_fault(vmf, ops);
+ case PE_SIZE_PMD:
+ return dax_iomap_pmd_fault(vmf, ops);
+ default:
+ return VM_FAULT_FALLBACK;
+ }
+}
+EXPORT_SYMBOL_GPL(dax_iomap_fault);