aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/hmm.h
diff options
context:
space:
mode:
authorJérôme Glisse <jglisse@redhat.com>2018-04-10 16:29:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-11 10:28:31 -0700
commitf88a1e90c665408732ab16ea48e1a182dce597a2 (patch)
tree4ed3ef70808a331a8fb6e773cb5d5d12c13c8642 /include/linux/hmm.h
parentmm/hmm: change hmm_vma_fault() to allow write fault on page basis (diff)
downloadlinux-dev-f88a1e90c665408732ab16ea48e1a182dce597a2.tar.xz
linux-dev-f88a1e90c665408732ab16ea48e1a182dce597a2.zip
mm/hmm: use device driver encoding for HMM pfn
Users of hmm_vma_fault() and hmm_vma_get_pfns() provide a flags array and pfn shift value allowing them to define their own encoding for HMM pfn that are fill inside the pfns array of the hmm_range struct. With this device driver can get pfn that match their own private encoding out of HMM without having to do any conversion. [rcampbell@nvidia.com: don't ignore specific pte fault flag in hmm_vma_fault()] Link: http://lkml.kernel.org/r/20180326213009.2460-2-jglisse@redhat.com [rcampbell@nvidia.com: clarify fault logic for device private memory] Link: http://lkml.kernel.org/r/20180326213009.2460-3-jglisse@redhat.com Link: http://lkml.kernel.org/r/20180323005527.758-16-jglisse@redhat.com Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com> Cc: Evgeny Baskakov <ebaskakov@nvidia.com> Cc: Ralph Campbell <rcampbell@nvidia.com> Cc: Mark Hairgrove <mhairgrove@nvidia.com> Cc: John Hubbard <jhubbard@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/hmm.h')
-rw-r--r--include/linux/hmm.h130
1 files changed, 94 insertions, 36 deletions
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 0f7ea3074175..5d26e0a223d9 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -80,68 +80,145 @@
struct hmm;
/*
+ * hmm_pfn_flag_e - HMM flag enums
+ *
* Flags:
* HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
* HMM_PFN_WRITE: CPU page table has write permission set
+ * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE)
+ *
+ * The driver provide a flags array, if driver valid bit for an entry is bit
+ * 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide
+ * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
+ * Same logic apply to all flags. This is same idea as vm_page_prot in vma
+ * except that this is per device driver rather than per architecture.
+ */
+enum hmm_pfn_flag_e {
+ HMM_PFN_VALID = 0,
+ HMM_PFN_WRITE,
+ HMM_PFN_DEVICE_PRIVATE,
+ HMM_PFN_FLAG_MAX
+};
+
+/*
+ * hmm_pfn_value_e - HMM pfn special value
+ *
+ * Flags:
* HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
+ * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
* HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
* result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
* be mirrored by a device, because the entry will never have HMM_PFN_VALID
* set and the pfn value is undefined.
- * HMM_PFN_DEVICE_PRIVATE: unaddressable device memory (ZONE_DEVICE)
+ *
+ * Driver provide entry value for none entry, error entry and special entry,
+ * driver can alias (ie use same value for error and special for instance). It
+ * should not alias none and error or special.
+ *
+ * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
+ * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
+ * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table
+ * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
*/
-#define HMM_PFN_VALID (1 << 0)
-#define HMM_PFN_WRITE (1 << 1)
-#define HMM_PFN_ERROR (1 << 2)
-#define HMM_PFN_SPECIAL (1 << 3)
-#define HMM_PFN_DEVICE_PRIVATE (1 << 4)
-#define HMM_PFN_SHIFT 5
+enum hmm_pfn_value_e {
+ HMM_PFN_ERROR,
+ HMM_PFN_NONE,
+ HMM_PFN_SPECIAL,
+ HMM_PFN_VALUE_MAX
+};
+
+/*
+ * struct hmm_range - track invalidation lock on virtual address range
+ *
+ * @vma: the vm area struct for the range
+ * @list: all range lock are on a list
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @pfns: array of pfns (big enough for the range)
+ * @flags: pfn flags to match device driver page table
+ * @values: pfn value for some special case (none, special, error, ...)
+ * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
+ * @valid: pfns array did not change since it has been fill by an HMM function
+ */
+struct hmm_range {
+ struct vm_area_struct *vma;
+ struct list_head list;
+ unsigned long start;
+ unsigned long end;
+ uint64_t *pfns;
+ const uint64_t *flags;
+ const uint64_t *values;
+ uint8_t pfn_shift;
+ bool valid;
+};
/*
* hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn
+ * @range: range use to decode HMM pfn value
* @pfn: HMM pfn value to get corresponding struct page from
* Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise
*
* If the HMM pfn is valid (ie valid flag set) then return the struct page
* matching the pfn value stored in the HMM pfn. Otherwise return NULL.
*/
-static inline struct page *hmm_pfn_to_page(uint64_t pfn)
+static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
+ uint64_t pfn)
{
- if (!(pfn & HMM_PFN_VALID))
+ if (pfn == range->values[HMM_PFN_NONE])
+ return NULL;
+ if (pfn == range->values[HMM_PFN_ERROR])
+ return NULL;
+ if (pfn == range->values[HMM_PFN_SPECIAL])
return NULL;
- return pfn_to_page(pfn >> HMM_PFN_SHIFT);
+ if (!(pfn & range->flags[HMM_PFN_VALID]))
+ return NULL;
+ return pfn_to_page(pfn >> range->pfn_shift);
}
/*
* hmm_pfn_to_pfn() - return pfn value store in a HMM pfn
+ * @range: range use to decode HMM pfn value
* @pfn: HMM pfn value to extract pfn from
* Returns: pfn value if HMM pfn is valid, -1UL otherwise
*/
-static inline unsigned long hmm_pfn_to_pfn(uint64_t pfn)
+static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
+ uint64_t pfn)
{
- if (!(pfn & HMM_PFN_VALID))
+ if (pfn == range->values[HMM_PFN_NONE])
+ return -1UL;
+ if (pfn == range->values[HMM_PFN_ERROR])
+ return -1UL;
+ if (pfn == range->values[HMM_PFN_SPECIAL])
return -1UL;
- return (pfn >> HMM_PFN_SHIFT);
+ if (!(pfn & range->flags[HMM_PFN_VALID]))
+ return -1UL;
+ return (pfn >> range->pfn_shift);
}
/*
* hmm_pfn_from_page() - create a valid HMM pfn value from struct page
+ * @range: range use to encode HMM pfn value
* @page: struct page pointer for which to create the HMM pfn
* Returns: valid HMM pfn for the page
*/
-static inline uint64_t hmm_pfn_from_page(struct page *page)
+static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
+ struct page *page)
{
- return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+ return (page_to_pfn(page) << range->pfn_shift) |
+ range->flags[HMM_PFN_VALID];
}
/*
* hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn
+ * @range: range use to encode HMM pfn value
* @pfn: pfn value for which to create the HMM pfn
* Returns: valid HMM pfn for the pfn
*/
-static inline uint64_t hmm_pfn_from_pfn(unsigned long pfn)
+static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
+ unsigned long pfn)
{
- return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+ return (pfn << range->pfn_shift) |
+ range->flags[HMM_PFN_VALID];
}
@@ -264,25 +341,6 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
/*
- * struct hmm_range - track invalidation lock on virtual address range
- *
- * @vma: the vm area struct for the range
- * @list: all range lock are on a list
- * @start: range virtual start address (inclusive)
- * @end: range virtual end address (exclusive)
- * @pfns: array of pfns (big enough for the range)
- * @valid: pfns array did not change since it has been fill by an HMM function
- */
-struct hmm_range {
- struct vm_area_struct *vma;
- struct list_head list;
- unsigned long start;
- unsigned long end;
- uint64_t *pfns;
- bool valid;
-};
-
-/*
* To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device
* driver lock that serializes device page table updates, then call
* hmm_vma_range_done(), to check if the snapshot is still valid. The same