From 745f234be12b6191b15eae8dd415cc81a9137f47 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 4 Sep 2015 15:46:14 -0700 Subject: userfaultfd: add vm_userfaultfd_ctx to the vm_area_struct This adds the vm_userfaultfd_ctx to the vm_area_struct. Signed-off-by: Andrea Arcangeli Acked-by: Pavel Emelyanov Cc: Sanidhya Kashyap Cc: zhang.zhanghailiang@huawei.com Cc: "Kirill A. Shutemov" Cc: Andres Lagar-Cavilla Cc: Dave Hansen Cc: Paolo Bonzini Cc: Rik van Riel Cc: Mel Gorman Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Peter Feiner Cc: "Dr. David Alan Gilbert" Cc: Johannes Weiner Cc: "Huangpeng (Peter)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/mm_types.h') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 15549578d559..26a30c3566f0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -256,6 +256,16 @@ struct vm_region { * this region */ }; +#ifdef CONFIG_USERFAULTFD +#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) { NULL, }) +struct vm_userfaultfd_ctx { + struct userfaultfd_ctx *ctx; +}; +#else /* CONFIG_USERFAULTFD */ +#define NULL_VM_UFFD_CTX ((struct vm_userfaultfd_ctx) {}) +struct vm_userfaultfd_ctx {}; +#endif /* CONFIG_USERFAULTFD */ + /* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -322,6 +332,7 @@ struct vm_area_struct { #ifdef CONFIG_NUMA struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif + struct vm_userfaultfd_ctx vm_userfaultfd_ctx; }; struct core_thread { -- cgit v1.3-6-gb490 From 5b74283ab251b9db55cbbe31d19ca72482103290 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 4 Sep 2015 15:47:29 -0700 Subject: x86, mm: trace when an IPI is about to be sent When unmapping pages it is necessary to flush the TLB. If that page was accessed by another CPU then an IPI is used to flush the remote CPU. That is a lot of IPIs if kswapd is scanning and unmapping >100K pages per second. There already is a window between when a page is unmapped and when it is TLB flushed. This series increases the window so multiple pages can be flushed using a single IPI. This should be safe or the kernel is hosed already. Patch 1 simply made the rest of the series easier to write as ftrace could identify all the senders of TLB flush IPIS. Patch 2 tracks what CPUs potentially map a PFN and then sends an IPI to flush the entire TLB. Patch 3 tracks when there potentially are writable TLB entries that need to be batched differently Patch 4 increases SWAP_CLUSTER_MAX to further batch flushes The performance impact is documented in the changelogs but in the optimistic case on a 4-socket machine the full series reduces interrupts from 900K interrupts/second to 60K interrupts/second. This patch (of 4): It is easy to trace when an IPI is received to flush a TLB but harder to detect what event sent it. This patch makes it easy to identify the source of IPIs being transmitted for TLB flushes on x86. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Reviewed-by: Dave Hansen Acked-by: Ingo Molnar Cc: Linus Torvalds Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/tlb.c | 1 + include/linux/mm_types.h | 1 + include/trace/events/tlb.h | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/mm_types.h') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 90b924acd982..8ddb5d0d66fb 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -140,6 +140,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, info.flush_end = end; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + trace_tlb_flush(TLB_REMOTE_SEND_IPI, end - start); if (is_uv_system()) { unsigned int cpu; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 26a30c3566f0..c8d0a73d64c4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -554,6 +554,7 @@ enum tlb_flush_reason { TLB_REMOTE_SHOOTDOWN, TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, + TLB_REMOTE_SEND_IPI, NR_TLB_FLUSH_REASONS, }; diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h index 4250f364a6ca..bc8815f45f3b 100644 --- a/include/trace/events/tlb.h +++ b/include/trace/events/tlb.h @@ -11,7 +11,8 @@ EM( TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" ) \ EM( TLB_REMOTE_SHOOTDOWN, "remote shootdown" ) \ EM( TLB_LOCAL_SHOOTDOWN, "local shootdown" ) \ - EMe( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) + EM( TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" ) \ + EMe( TLB_REMOTE_SEND_IPI, "remote ipi send" ) /* * First define the enums in TLB_FLUSH_REASON to be exported to userspace -- cgit v1.3-6-gb490 From 64b990d2957cb535fe1c17b9694d5d4f7de69962 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 8 Sep 2015 15:02:15 -0700 Subject: mm: drop __nocast from vm_flags_t definition __nocast does no good for vm_flags_t. It only produces useless sparse warnings. Let's drop it. Signed-off-by: Kirill A. Shutemov Cc: Oleg Nesterov Acked-by: David Rientjes Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/mm_types.h') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c8d0a73d64c4..3d6baa7d4534 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -235,7 +235,7 @@ struct page_frag_cache { bool pfmemalloc; }; -typedef unsigned long __nocast vm_flags_t; +typedef unsigned long vm_flags_t; /* * A region containing a mapping of a non-memory backed file under NOMMU -- cgit v1.3-6-gb490