From 9bd3bb6703d8c0a5fb8aec8e3287bd55b7341dcd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 11 Jul 2019 20:52:08 -0700 Subject: mm/nvdimm: add is_ioremap_addr and use that to check ioremap address Architectures like powerpc use different address range to map ioremap and vmalloc range. The memunmap() check used by the nvdimm layer was wrongly using is_vmalloc_addr() to check for ioremap range which fails for ppc64. This result in ppc64 not freeing the ioremap mapping. The side effect of this is an unbind failure during module unload with papr_scm nvdimm driver Link: http://lkml.kernel.org/r/20190701134038.14165-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions") Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/pgtable.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 3f53be60fb01..64145751b2fd 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -140,6 +140,20 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p) } #endif +#ifdef CONFIG_PPC64 +#define is_ioremap_addr is_ioremap_addr +static inline bool is_ioremap_addr(const void *x) +{ +#ifdef CONFIG_MMU + unsigned long addr = (unsigned long)x; + + return addr >= IOREMAP_BASE && addr < IOREMAP_END; +#else + return false; +#endif +} +#endif /* CONFIG_PPC64 */ + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_H */ -- cgit v1.2.3-59-g8ed1b From cc0e5f1ce0a8017c68983eb6b41a1dbd0d24aa98 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Thu, 11 Jul 2019 20:52:33 -0700 Subject: scripts/spelling.txt: drop "sepc" from the misspelling list The RISC-V architecture has a register named the "Supervisor Exception Program Counter", or "sepc". This abbreviation triggers checkpatch.pl's misspelling detector, resulting in noise in the checkpatch output. The risk that this noise could cause more useful warnings to be missed seems to outweigh the harm of an occasional misspelling of "spec". Thus drop the "sepc" entry from the misspelling list. [akpm@linux-foundation.org: fix existing "sepc" instances, per Joe] Link: http://lkml.kernel.org/r/20190518210037.13674-1-paul.walmsley@sifive.com Signed-off-by: Paul Walmsley Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kvm/book3s_xics.c | 2 +- arch/unicore32/include/mach/regs-gpio.h | 2 +- drivers/net/wireless/realtek/rtlwifi/wifi.h | 2 +- drivers/scsi/lpfc/lpfc_init.c | 2 +- drivers/staging/rtl8723bs/hal/rtl8723b_phycfg.c | 2 +- scripts/spelling.txt | 1 - 6 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index e8276161872e..381bf8dea193 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -827,7 +827,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) * * Note: If EOI is incorrectly used by SW to lower the CPPR * value (ie more favored), we do not check for rejection of - * a pending interrupt, this is a SW error and PAPR sepcifies + * a pending interrupt, this is a SW error and PAPR specifies * that we don't have to deal with it. * * The sending of an EOI to the ICS is handled after the diff --git a/arch/unicore32/include/mach/regs-gpio.h b/arch/unicore32/include/mach/regs-gpio.h index 806350e1ccb6..5fc701ee33e3 100644 --- a/arch/unicore32/include/mach/regs-gpio.h +++ b/arch/unicore32/include/mach/regs-gpio.h @@ -32,7 +32,7 @@ */ #define GPIO_GEDR (PKUNITY_GPIO_BASE + 0x0018) /* - * Sepcial Voltage Detect Reg GPIO_GPIR. + * Special Voltage Detect Reg GPIO_GPIR. */ #define GPIO_GPIR (PKUNITY_GPIO_BASE + 0x0020) diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 81caa3782ec0..3bdda1c98339 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -598,7 +598,7 @@ enum ht_channel_width { HT_CHANNEL_WIDTH_MAX, }; -/* Ref: 802.11i sepc D10.0 7.3.2.25.1 +/* Ref: 802.11i spec D10.0 7.3.2.25.1 * Cipher Suites Encryption Algorithms */ enum rt_enc_alg { diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index eaaef682de25..adfc2ec0f4fc 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -2963,7 +2963,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba) del_timer_sync(&phba->fcp_poll_timer); break; case LPFC_PCI_DEV_OC: - /* Stop any OneConnect device sepcific driver timers */ + /* Stop any OneConnect device specific driver timers */ lpfc_sli4_stop_fcf_redisc_wait_timer(phba); break; default: diff --git a/drivers/staging/rtl8723bs/hal/rtl8723b_phycfg.c b/drivers/staging/rtl8723bs/hal/rtl8723b_phycfg.c index 4f2ad54af398..6da7f8e7bdae 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723b_phycfg.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723b_phycfg.c @@ -45,7 +45,7 @@ static u32 phy_CalculateBitShift(u32 BitMask) /** * Function: PHY_QueryBBReg * -* OverView: Read "sepcific bits" from BB register +* OverView: Read "specific bits" from BB register * * Input: * struct adapter * Adapter, diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 86b87332b9e5..5ae83ce31902 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -1145,7 +1145,6 @@ senarios||scenarios sentivite||sensitive separatly||separately sepcify||specify -sepc||spec seperated||separated seperately||separately seperate||separate -- cgit v1.2.3-59-g8ed1b From 38ce85f028fe4ea7350f448260845bc3cb030ad1 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 11 Jul 2019 20:52:45 -0700 Subject: arch/sh/configs/sdk7786_defconfig: remove CONFIG_LOGFS After commit 1d0fd57a50aa ("logfs: remove from tree"), logfs was removed, drop CONFIG_LOGFS from all defconfigs. Link: http://lkml.kernel.org/r/20190530021032.190639-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/configs/sdk7786_defconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 5209889765ad..49a29338789b 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -191,7 +191,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=m CONFIG_JFFS2_FS_XATTR=y CONFIG_UBIFS_FS=m -CONFIG_LOGFS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m CONFIG_ROMFS_FS=m -- cgit v1.2.3-59-g8ed1b From 410615478667e5ed374755cafea60917ec322b64 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 11 Jul 2019 20:52:48 -0700 Subject: sh: config: remove left-over BACKLIGHT_LCD_SUPPORT CONFIG_BACKLIGHT_LCD_SUPPORT was removed in 8c5dc8d9f19c ("video: backlight: Remove useless BACKLIGHT_LCD_SUPPORT kernel symbol"). Options protected by CONFIG_BACKLIGHT_LCD_SUPPORT are now available directly. Link: http://lkml.kernel.org/r/20190603191925.20659-1-krzk@kernel.org Signed-off-by: Krzysztof Kozlowski Cc: Yoshinori Sato Cc: Rich Felker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/configs/hp6xx_defconfig | 1 - arch/sh/configs/sh2007_defconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index 4dcf7f552582..91d43e2bffea 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -40,7 +40,6 @@ CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_HIT=y CONFIG_FB_SH_MOBILE_LCDC=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FONTS=y CONFIG_FONT_PEARL_8x8=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index a1cf6447dbb1..cbd6742eb423 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -85,7 +85,6 @@ CONFIG_WATCHDOG=y CONFIG_SH_WDT=y CONFIG_SSB=y CONFIG_FB=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -- cgit v1.2.3-59-g8ed1b From 733f0025f0fb43e382b84db0930ae502099b7e62 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Thu, 11 Jul 2019 20:52:52 -0700 Subject: sh: prevent warnings when using iounmap When building drm/exynos for sh, as part of an allmodconfig build, the following warning triggered: exynos7_drm_decon.c: In function `decon_remove': exynos7_drm_decon.c:769:24: warning: unused variable `ctx' struct decon_context *ctx = dev_get_drvdata(&pdev->dev); The ctx variable is only used as argument to iounmap(). In sh - allmodconfig CONFIG_MMU is not defined so it ended up in: \#define __iounmap(addr) do { } while (0) \#define iounmap __iounmap Fix the warning by introducing a static inline function for iounmap. This is similar to several other architectures. Link: http://lkml.kernel.org/r/20190622114208.24427-1-sam@ravnborg.org Signed-off-by: Sam Ravnborg Reviewed-by: Geert Uytterhoeven Cc: Yoshinori Sato Cc: Rich Felker Cc: Will Deacon Cc: Mark Brown Cc: Inki Dae Cc: Krzysztof Kozlowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/asm/io.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index c28e37a344ad..ac0561960c52 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -369,7 +369,11 @@ static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; } #define ioremap_nocache ioremap #define ioremap_uc ioremap -#define iounmap __iounmap + +static inline void iounmap(void __iomem *addr) +{ + __iounmap(addr); +} /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem -- cgit v1.2.3-59-g8ed1b From ff66135015726696568e998720d9b6afe2d04642 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 11 Jul 2019 20:53:56 -0700 Subject: x86: use static_cpu_has in uaccess region to avoid instrumentation This patch is a pre-requisite for enabling KASAN bitops instrumentation; using static_cpu_has instead of boot_cpu_has avoids instrumentation of test_bit inside the uaccess region. With instrumentation, the KASAN check would otherwise be flagged by objtool. For consistency, kernel/signal.c was changed to mirror this change, however, is never instrumented with KASAN (currently unsupported under x86 32bit). Link: http://lkml.kernel.org/r/20190613125950.197667-3-elver@google.com Signed-off-by: Marco Elver Suggested-by: H. Peter Anvin Acked-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Mark Rutland Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_signal.c | 2 +- arch/x86/kernel/signal.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 629d1ee05599..1cee10091b9f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -358,7 +358,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); /* Create the ucontext. */ - if (boot_cpu_has(X86_FEATURE_XSAVE)) + if (static_cpu_has(X86_FEATURE_XSAVE)) put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); else put_user_ex(0, &frame->uc.uc_flags); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7cf508f78c8c..8eb7193e158d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -391,7 +391,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(&frame->uc, &frame->puc); /* Create the ucontext. */ - if (boot_cpu_has(X86_FEATURE_XSAVE)) + if (static_cpu_has(X86_FEATURE_XSAVE)) put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); else put_user_ex(0, &frame->uc.uc_flags); -- cgit v1.2.3-59-g8ed1b From 751ad98d5f881df91ba47e013b82422912381e8e Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 11 Jul 2019 20:54:00 -0700 Subject: asm-generic, x86: add bitops instrumentation for KASAN This adds a new header to asm-generic to allow optionally instrumenting architecture-specific asm implementations of bitops. This change includes the required change for x86 as reference and changes the kernel API doc to point to bitops-instrumented.h instead. Rationale: the functions in x86's bitops.h are no longer the kernel API functions, but instead the arch_ prefixed functions, which are then instrumented via bitops-instrumented.h. Other architectures can similarly add support for asm implementations of bitops. The documentation text was derived from x86 and existing bitops asm-generic versions: 1) references to x86 have been removed; 2) as a result, some of the text had to be reworded for clarity and consistency. Tested using lib/test_kasan with bitops tests (pre-requisite patch). Bugzilla ref: https://bugzilla.kernel.org/show_bug.cgi?id=198439 Link: http://lkml.kernel.org/r/20190613125950.197667-4-elver@google.com Signed-off-by: Marco Elver Acked-by: Mark Rutland Reviewed-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/core-api/kernel-api.rst | 2 +- arch/x86/include/asm/bitops.h | 189 +++++---------------- include/asm-generic/bitops-instrumented.h | 263 ++++++++++++++++++++++++++++++ 3 files changed, 302 insertions(+), 152 deletions(-) create mode 100644 include/asm-generic/bitops-instrumented.h (limited to 'arch') diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index 824f24ccf401..08af5caf036d 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -54,7 +54,7 @@ The Linux kernel provides more basic utility functions. Bit Operations -------------- -.. kernel-doc:: arch/x86/include/asm/bitops.h +.. kernel-doc:: include/asm-generic/bitops-instrumented.h :internal: Bitmap Operations diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 8e790ec219a5..ba15d53c1ca7 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -49,23 +49,8 @@ #define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * - * Note: there are no guarantees that this function will not be reordered - * on non x86 architectures, so if you are writing portable code, - * make sure not to rely on its reordering guarantees. - * - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ static __always_inline void -set_bit(long nr, volatile unsigned long *addr) +arch_set_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" @@ -78,32 +63,14 @@ set_bit(long nr, volatile unsigned long *addr) } } -/** - * __set_bit - Set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike set_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void __set_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___set_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() - * in order to ensure changes are visible on other processors. - */ static __always_inline void -clear_bit(long nr, volatile unsigned long *addr) +arch_clear_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" @@ -115,26 +82,21 @@ clear_bit(long nr, volatile unsigned long *addr) } } -/* - * clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and implies release semantics before the memory - * operation. It can be used for an unlock. - */ -static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void +arch_clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); - clear_bit(nr, addr); + arch_clear_bit(nr, addr); } -static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___clear_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) { bool negative; asm volatile(LOCK_PREFIX "andb %2,%1" @@ -143,48 +105,23 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile : "ir" ((char) ~(1 << nr)) : "memory"); return negative; } +#define arch_clear_bit_unlock_is_negative_byte \ + arch_clear_bit_unlock_is_negative_byte -// Let everybody know we have it -#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte - -/* - * __clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * __clear_bit() is non-atomic and implies release semantics before the memory - * operation. It can be used for an unlock if no other CPUs can concurrently - * modify other bits in the word. - */ -static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void +arch___clear_bit_unlock(long nr, volatile unsigned long *addr) { - __clear_bit(nr, addr); + arch___clear_bit(nr, addr); } -/** - * __change_bit - Toggle a bit in memory - * @nr: the bit to change - * @addr: the address to start counting from - * - * Unlike change_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void __change_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___change_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to change - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __always_inline void change_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch_change_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "xorb %1,%0" @@ -196,42 +133,20 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) } } -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_set_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr); } -/** - * test_and_set_bit_lock - Set a bit and return its old value for lock - * @nr: Bit to set - * @addr: Address to count from - * - * This is the same as test_and_set_bit on x86. - */ static __always_inline bool -test_and_set_bit_lock(long nr, volatile unsigned long *addr) +arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr) { - return test_and_set_bit(nr, addr); + return arch_test_and_set_bit(nr, addr); } -/** - * __test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_set_bit(long nr, volatile unsigned long *addr) { bool oldbit; @@ -242,28 +157,13 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * return oldbit; } -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_clear_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr); } -/** - * __test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - * +/* * Note: the operation is performed atomically with respect to * the local CPU, but not other CPUs. Portable code should not * rely on this behaviour. @@ -271,7 +171,8 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long * * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ -static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_clear_bit(long nr, volatile unsigned long *addr) { bool oldbit; @@ -282,8 +183,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long return oldbit; } -/* WARNING: non atomic and it can be reordered! */ -static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_change_bit(long nr, volatile unsigned long *addr) { bool oldbit; @@ -295,15 +196,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon return oldbit; } -/** - * test_and_change_bit - Change a bit and return its old value - * @nr: Bit to change - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_change_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } @@ -326,16 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l return oldbit; } -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ -/** - * test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static bool test_bit(int nr, const volatile unsigned long *addr); -#endif - -#define test_bit(nr, addr) \ +#define arch_test_bit(nr, addr) \ (__builtin_constant_p((nr)) \ ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) @@ -504,6 +389,8 @@ static __always_inline int fls64(__u64 x) #include +#include + #include #include diff --git a/include/asm-generic/bitops-instrumented.h b/include/asm-generic/bitops-instrumented.h new file mode 100644 index 000000000000..ddd1c6d9d8db --- /dev/null +++ b/include/asm-generic/bitops-instrumented.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * This file provides wrappers with sanitizer instrumentation for bit + * operations. + * + * To use this functionality, an arch's bitops.h file needs to define each of + * the below bit operations with an arch_ prefix (e.g. arch_set_bit(), + * arch___set_bit(), etc.). + */ +#ifndef _ASM_GENERIC_BITOPS_INSTRUMENTED_H +#define _ASM_GENERIC_BITOPS_INSTRUMENTED_H + +#include + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This is a relaxed atomic operation (no implied memory barriers). + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void set_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + arch_set_bit(nr, addr); +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic. If it is called on the same + * region of memory concurrently, the effect may be that only one operation + * succeeds. + */ +static inline void __set_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + arch___set_bit(nr, addr); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * This is a relaxed atomic operation (no implied memory barriers). + */ +static inline void clear_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + arch_clear_bit(nr, addr); +} + +/** + * __clear_bit - Clears a bit in memory + * @nr: the bit to clear + * @addr: the address to start counting from + * + * Unlike clear_bit(), this function is non-atomic. If it is called on the same + * region of memory concurrently, the effect may be that only one operation + * succeeds. + */ +static inline void __clear_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + arch___clear_bit(nr, addr); +} + +/** + * clear_bit_unlock - Clear a bit in memory, for unlock + * @nr: the bit to set + * @addr: the address to start counting from + * + * This operation is atomic and provides release barrier semantics. + */ +static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + arch_clear_bit_unlock(nr, addr); +} + +/** + * __clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * This is a non-atomic operation but implies a release barrier before the + * memory operation. It can be used for an unlock if no other CPUs can + * concurrently modify other bits in the word. + */ +static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + arch___clear_bit_unlock(nr, addr); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * This is a relaxed atomic operation (no implied memory barriers). + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void change_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + arch_change_bit(nr, addr); +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic. If it is called on the same + * region of memory concurrently, the effect may be that only one operation + * succeeds. + */ +static inline void __change_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + arch___change_bit(nr, addr); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This is an atomic fully-ordered operation (implied full memory barrier). + */ +static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_and_set_bit(nr, addr); +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic. If two instances of this operation race, one + * can appear to succeed but actually fail. + */ +static inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + return arch___test_and_set_bit(nr, addr); +} + +/** + * test_and_set_bit_lock - Set a bit and return its old value, for lock + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and provides acquire barrier semantics if + * the returned value is 0. + * It can be used to implement bit locks. + */ +static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_and_set_bit_lock(nr, addr); +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This is an atomic fully-ordered operation (implied full memory barrier). + */ +static inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_and_clear_bit(nr, addr); +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic. If two instances of this operation race, one + * can appear to succeed but actually fail. + */ +static inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + return arch___test_and_clear_bit(nr, addr); +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This is an atomic fully-ordered operation (implied full memory barrier). + */ +static inline bool test_and_change_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_and_change_bit(nr, addr); +} + +/** + * __test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is non-atomic. If two instances of this operation race, one + * can appear to succeed but actually fail. + */ +static inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + return arch___test_and_change_bit(nr, addr); +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline bool test_bit(long nr, const volatile unsigned long *addr) +{ + kasan_check_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit(nr, addr); +} + +#if defined(arch_clear_bit_unlock_is_negative_byte) +/** + * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom + * byte is negative, for unlock. + * @nr: the bit to clear + * @addr: the address to start counting from + * + * This operation is atomic and provides release barrier semantics. + * + * This is a bit of a one-trick-pony for the filemap code, which clears + * PG_locked and tests PG_waiters, + */ +static inline bool +clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +{ + kasan_check_write(addr + BIT_WORD(nr), sizeof(long)); + return arch_clear_bit_unlock_is_negative_byte(nr, addr); +} +/* Let everybody know we have it. */ +#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte +#endif + +#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_H */ -- cgit v1.2.3-59-g8ed1b From 03069bb0b19f8c3b6e079099dd8ac5e974756381 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:54:24 -0700 Subject: arm: remove ARCH_SELECT_MEMORY_MODEL Patch series "remove ARCH_SELECT_MEMORY_MODEL where it has no effect". For several architectures the ARCH_SELECT_MEMORY_MODEL has no real effect because the dependencies for the memory model are always evaluated to a single value. Remove the ARCH_SELECT_MEMORY_MODEL from the Kconfigs for these architectures. This patch (of 3): The ARCH_SELECT_MEMORY_MODEL in arch/arm/Kconfig is enabled only when ARCH_SPARSEMEM_ENABLE=y. But in this case, ARCH_SPARSEMEM_DEFAULT is also enabled and this in turn enables SPARSEMEM_MANUAL. Since there is no definition of ARCH_FLATMEM_ENABLE in arch/arm/Kconfig, SPARSEMEM_MANUAL is the only enabled memory model, hence the final selection will evaluate to SPARSEMEM=y. Since ARCH_SPARSEMEM_ENABLE is set to 'y' only by several sub-arch configurations, the default for must sub-arches would be the falback to FLATMEM regardless of ARCH_SELECT_MEMORY_MODEL. Link: http://lkml.kernel.org/r/1556740577-4140-2-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Heiko Carstens Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d850feb5cc0a..0c55aa199c67 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1622,9 +1622,6 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool ARCH_SPARSEMEM_ENABLE -config ARCH_SELECT_MEMORY_MODEL - def_bool ARCH_SPARSEMEM_ENABLE - config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM -- cgit v1.2.3-59-g8ed1b From a9d8777e397e3d1630377fa1d0f0dea0329057c3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:54:27 -0700 Subject: s390: remove ARCH_SELECT_MEMORY_MODEL The only reason s390 has ARCH_SELECT_MEMORY_MODEL option in arch/s390/Kconfig is an ancient compile error with allnoconfig which was fixed by commit 97195d6b411f ("[S390] fix sparsemem related compile error with allnoconfig on s390") by adding the ARCH_SELECT_MEMORY_MODEL option. Since then a lot have changed and now allnoconfig builds just fine without ARCH_SELECT_MEMORY_MODEL, so it can be removed. Link: http://lkml.kernel.org/r/1556740577-4140-3-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Heiko Carstens Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index fdb4246265a5..f089ae375f6b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -641,9 +641,6 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool y -config ARCH_SELECT_MEMORY_MODEL - def_bool y - config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y if SPARSEMEM -- cgit v1.2.3-59-g8ed1b From 445676071776de492624a063923736637f9eb3e8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:54:31 -0700 Subject: sparc: remove ARCH_SELECT_MEMORY_MODEL The ARCH_SELECT_MEMORY_MODEL option is enabled only for 64-bit. However, 64-bit configuration also enables ARCH_SPARSEMEM_DEFAULT and there is no ARCH_FLATMEM_ENABLE in arch/sparc/Kconfig. With such settings, the dependencies in mm/Kconfig are always evaluated to SPARSEMEM=y for 64-bit and to FLATMEM=y for 32-bit. The ARCH_SELECT_MEMORY_MODEL option in arch/sparc/Kconfig does not affect anything and can be removed. Link: http://lkml.kernel.org/r/1556740577-4140-4-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Heiko Carstens Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 26ab6f5bbaaf..8fc95c7809ef 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -300,9 +300,6 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES -config ARCH_SELECT_MEMORY_MODEL - def_bool y if SPARC64 - config ARCH_SPARSEMEM_ENABLE def_bool y if SPARC64 select SPARSEMEM_VMEMMAP_ENABLE -- cgit v1.2.3-59-g8ed1b From 26f4c328079d78d6cd0462c53c14ec0b69f4748e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:56:45 -0700 Subject: mm: simplify gup_fast_permitted Pass in the already calculated end value instead of recomputing it, and leave the end > start check in the callers instead of duplicating them in the arch code. Link: http://lkml.kernel.org/r/20190625143715.1689-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: David Miller Cc: James Hogan Cc: Khalid Aziz Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 8 +------- arch/x86/include/asm/pgtable_64.h | 8 +------- mm/gup.c | 17 +++++++---------- 3 files changed, 9 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9f0195d5fa16..9b274fcaacb6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1270,14 +1270,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) -static inline bool gup_fast_permitted(unsigned long start, int nr_pages) +static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { - unsigned long len, end; - - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (end < start) - return false; return end <= current->mm->context.asce_limit; } #define gup_fast_permitted gup_fast_permitted diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 0bb566315621..4990d26dfc73 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -259,14 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #define gup_fast_permitted gup_fast_permitted -static inline bool gup_fast_permitted(unsigned long start, int nr_pages) +static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { - unsigned long len, end; - - len = (unsigned long)nr_pages << PAGE_SHIFT; - end = start + len; - if (end < start) - return false; if (end >> __VIRTUAL_MASK_SHIFT) return false; return true; diff --git a/mm/gup.c b/mm/gup.c index fc704dc37914..84891c06d485 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2123,13 +2123,9 @@ static void gup_pgd_range(unsigned long addr, unsigned long end, * Check if it's allowed to use __get_user_pages_fast() for the range, or * we need to fall back to the slow version: */ -bool gup_fast_permitted(unsigned long start, int nr_pages) +static bool gup_fast_permitted(unsigned long start, unsigned long end) { - unsigned long len, end; - - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - return end >= start; + return true; } #endif @@ -2150,6 +2146,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; + if (end <= start) + return 0; if (unlikely(!access_ok((void __user *)start, len))) return 0; @@ -2165,7 +2163,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * block IPIs that come from THPs splitting. */ - if (gup_fast_permitted(start, nr_pages)) { + if (gup_fast_permitted(start, end)) { local_irq_save(flags); gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr); local_irq_restore(flags); @@ -2224,13 +2222,12 @@ int get_user_pages_fast(unsigned long start, int nr_pages, len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (nr_pages <= 0) + if (end <= start) return 0; - if (unlikely(!access_ok((void __user *)start, len))) return -EFAULT; - if (gup_fast_permitted(start, nr_pages)) { + if (gup_fast_permitted(start, end)) { local_irq_disable(); gup_pgd_range(addr, end, gup_flags, pages, &nr); local_irq_enable(); -- cgit v1.2.3-59-g8ed1b From 39656e83dab918861931ef96e5c41731b0899e56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:56:49 -0700 Subject: mm: lift the x86_32 PAE version of gup_get_pte to common code The split low/high access is the only non-READ_ONCE version of gup_get_pte that did show up in the various arch implemenations. Lift it to common code and drop the ifdef based arch override. Link: http://lkml.kernel.org/r/20190625143715.1689-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: David Miller Cc: James Hogan Cc: Khalid Aziz Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/pgtable-3level.h | 47 -------------------------------- arch/x86/kvm/mmu.c | 2 +- mm/Kconfig | 3 +++ mm/gup.c | 51 ++++++++++++++++++++++++++++++++--- 5 files changed, 52 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dce10b18f4bc..71c1f7864434 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -123,6 +123,7 @@ config X86 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GUP_GET_PTE_LOW_HIGH if X86_PAE select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index f8b1ad2c3828..e3633795fb22 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -285,53 +285,6 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) #define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ __pteval_swp_offset(pte))) -#define gup_get_pte gup_get_pte -/* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atomically, - * but that is not possible (without expensive cmpxchg8b) on PAE. What - * we do have is the guarantee that a PTE will only either go from not - * present to present, or present to not present or both -- it will not - * switch to a completely different present page without a TLB flush in - * between; something that we are blocking by holding interrupts off. - * - * Setting ptes from not present to present goes: - * - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees 'l' iff pte_high - * sees 'h'. We load pte_high *after* loading pte_low, which ensures we - * don't see an older value of pte_high. *Then* we recheck pte_low, - * which ensures that we haven't picked up a changed pte high. We might - * have gotten rubbish values from pte_low and pte_high, but we are - * guaranteed that pte_low will not have the present bit set *unless* - * it is 'l'. Because get_user_pages_fast() only operates on present ptes - * we're safe. - */ -static inline pte_t gup_get_pte(pte_t *ptep) -{ - pte_t pte; - - do { - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - } while (unlikely(pte.pte_low != ptep->pte_low)); - - return pte; -} - #include #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 98f6e4f88b04..4a9c63d1c20a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -650,7 +650,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) /* * The idea using the light way get the spte on x86_32 guest is from - * gup_get_pte(arch/x86/mm/gup.c). + * gup_get_pte (mm/gup.c). * * An spte tlb flush may be pending, because kvm_set_pte_rmapp * coalesces them and we are running out of the MMU lock. Therefore diff --git a/mm/Kconfig b/mm/Kconfig index ef6efedc5921..53e2ca54b385 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -762,6 +762,9 @@ config GUP_BENCHMARK See tools/testing/selftests/vm/gup_benchmark.c +config GUP_GET_PTE_LOW_HIGH + bool + config ARCH_HAS_PTE_SPECIAL bool diff --git a/mm/gup.c b/mm/gup.c index 84891c06d485..2093283e4933 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1684,17 +1684,60 @@ struct page *get_dump_page(unsigned long addr) * This code is based heavily on the PowerPC implementation by Nick Piggin. */ #ifdef CONFIG_HAVE_GENERIC_GUP +#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH +/* + * WARNING: only to be used in the get_user_pages_fast() implementation. + * + * With get_user_pages_fast(), we walk down the pagetables without taking any + * locks. For this we would like to load the pointers atomically, but sometimes + * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What + * we do have is the guarantee that a PTE will only either go from not present + * to present, or present to not present or both -- it will not switch to a + * completely different present page without a TLB flush in between; something + * that we are blocking by holding interrupts off. + * + * Setting ptes from not present to present goes: + * + * ptep->pte_high = h; + * smp_wmb(); + * ptep->pte_low = l; + * + * And present to not present goes: + * + * ptep->pte_low = 0; + * smp_wmb(); + * ptep->pte_high = 0; + * + * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'. + * We load pte_high *after* loading pte_low, which ensures we don't see an older + * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't + * picked up a changed pte high. We might have gotten rubbish values from + * pte_low and pte_high, but we are guaranteed that pte_low will not have the + * present bit set *unless* it is 'l'. Because get_user_pages_fast() only + * operates on present ptes we're safe. + */ +static inline pte_t gup_get_pte(pte_t *ptep) +{ + pte_t pte; -#ifndef gup_get_pte + do { + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + } while (unlikely(pte.pte_low != ptep->pte_low)); + + return pte; +} +#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ /* - * We assume that the PTE can be read atomically. If this is not the case for - * your architecture, please provide the helper. + * We require that the PTE can be read atomically. */ static inline pte_t gup_get_pte(pte_t *ptep) { return READ_ONCE(*ptep); } -#endif +#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages) { -- cgit v1.2.3-59-g8ed1b From 446f062bf06c81de85edd279ee179715c83a4270 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:56:52 -0700 Subject: MIPS: use the generic get_user_pages_fast code The mips code is mostly equivalent to the generic one, minus various bugfixes and an arch override for gup_fast_permitted. Note that this defines ARCH_HAS_PTE_SPECIAL for mips as mips has pte_special and pte_mkspecial implemented and used in the existing gup code. They are no-op stubs, though which makes me a little unsure if this is really right thing to do. Note that this also adds back a missing cpu_has_dc_aliases check for __get_user_pages_fast, which the old code was only doing for get_user_pages_fast. This clearly looks like an oversight, as any condition that makes get_user_pages_fast unsafe also applies to __get_user_pages_fast. [hch@lst.de: MIPS: don't select ARCH_HAS_PTE_SPECIAL] Link: http://lkml.kernel.org/r/20190701151818.32227-3-hch@lst.de Link: http://lkml.kernel.org/r/20190625143715.1689-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Tested-by: Guenter Roeck Cc: Ralf Baechle Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: David Miller Cc: James Hogan Cc: Khalid Aziz Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/Kconfig | 2 + arch/mips/include/asm/pgtable.h | 3 + arch/mips/mm/Makefile | 1 - arch/mips/mm/gup.c | 303 ---------------------------------------- 4 files changed, 5 insertions(+), 304 deletions(-) delete mode 100644 arch/mips/mm/gup.c (limited to 'arch') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 70d3200476bf..51c05f669d57 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -34,6 +34,7 @@ config MIPS select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT select HANDLE_DOMAIN_IRQ select HAVE_ARCH_COMPILER_H select HAVE_ARCH_JUMP_LABEL @@ -55,6 +56,7 @@ config MIPS select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER + select HAVE_GENERIC_GUP select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 4ccb465ef3f2..7d27194e3b45 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -20,6 +20,7 @@ #include #include #include +#include struct mm_struct; struct vm_area_struct; @@ -626,6 +627,8 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#define gup_fast_permitted(start, end) (!cpu_has_dc_aliases) + #include /* diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index f34d7ff5eb60..1e8d335025d7 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -7,7 +7,6 @@ obj-y += cache.o obj-y += context.o obj-y += extable.o obj-y += fault.o -obj-y += gup.o obj-y += init.o obj-y += mmap.o obj-y += page.o diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c deleted file mode 100644 index 4c2b4483683c..000000000000 --- a/arch/mips/mm/gup.c +++ /dev/null @@ -1,303 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Lockless get_user_pages_fast for MIPS - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - * Copyright (C) 2011 Ralf Baechle - */ -#include -#include -#include -#include -#include -#include - -#include -#include - -static inline pte_t gup_get_pte(pte_t *ptep) -{ -#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) - pte_t pte; - -retry: - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - if (unlikely(pte.pte_low != ptep->pte_low)) - goto retry; - - return pte; -#else - return READ_ONCE(*ptep); -#endif -} - -static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t *ptep = pte_offset_map(&pmd, addr); - do { - pte_t pte = gup_get_pte(ptep); - struct page *page; - - if (!pte_present(pte) || - pte_special(pte) || (write && !pte_write(pte))) { - pte_unmap(ptep); - return 0; - } - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - get_page(page); - SetPageReferenced(page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - pte_unmap(ptep - 1); - return 1; -} - -static inline void get_head_page_multiple(struct page *page, int nr) -{ - VM_BUG_ON(page != compound_head(page)); - VM_BUG_ON(page_count(page) == 0); - page_ref_add(page, nr); - SetPageReferenced(page); -} - -static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t pte = *(pte_t *)&pmd; - struct page *head, *page; - int refs; - - if (write && !pte_write(pte)) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(pte_special(pte)); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - get_head_page_multiple(head, refs); - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (unlikely(pmd_huge(pmd))) { - if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) - return 0; - } else { - if (!gup_pte_range(pmd, addr, next, write, pages,nr)) - return 0; - } - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t pte = *(pte_t *)&pud; - struct page *head, *page; - int refs; - - if (write && !pte_write(pte)) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(pte_special(pte)); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - get_head_page_multiple(head, refs); - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_huge(pud))) { - if (!gup_huge_pud(pud, addr, next, write, pages,nr)) - return 0; - } else { - if (!gup_pmd_range(pud, addr, next, write, pages,nr)) - return 0; - } - } while (pudp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - * Note a difference with get_user_pages_fast: this always returns the - * number of pages pinned, 0 if no pages were pinned. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (unlikely(!access_ok((void __user *)start, len))) - return 0; - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch - * size will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the page and take a ref on it. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @gup_flags: flags modifying pin behaviour - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int ret, nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - - end = start + len; - if (end < start || cpu_has_dc_aliases) - goto slow_irqon; - - /* XXX: batch / limit 'nr' */ - local_irq_disable(); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, - pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; -slow: - local_irq_enable(); - -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - pages, gup_flags); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - return ret; -} -- cgit v1.2.3-59-g8ed1b From 2f85e7f948a2c9f7e1524397d1818191ff5abb03 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:56:56 -0700 Subject: sh: add the missing pud_page definition sh only had pud_page_vaddr, but not pud_page. [hch@lst.de: sh: stub out pud_page] Link: http://lkml.kernel.org/r/20190701151818.32227-2-hch@lst.de Link: http://lkml.kernel.org/r/20190625143715.1689-6-hch@lst.de Signed-off-by: Christoph Hellwig Tested-by: Guenter Roeck Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: David Miller Cc: James Hogan Cc: Jason Gunthorpe Cc: Khalid Aziz Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/asm/pgtable-3level.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 7d8587eb65ff..779260b721ca 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -38,6 +38,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud) return pud_val(pud); } +/* only used by the stubbed out hugetlb gup code, should never be called */ +#define pud_page(pud) NULL + #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { -- cgit v1.2.3-59-g8ed1b From 3c9b9accad9f25a52438c790f76b08d79051d383 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:57:00 -0700 Subject: sh: use the generic get_user_pages_fast code The sh code is mostly equivalent to the generic one, minus various bugfixes and two arch overrides that this patch adds to pgtable.h. Link: http://lkml.kernel.org/r/20190625143715.1689-7-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: David Miller Cc: James Hogan Cc: Jason Gunthorpe Cc: Khalid Aziz Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/Kconfig | 2 + arch/sh/include/asm/pgtable.h | 37 ++++++ arch/sh/mm/Makefile | 2 +- arch/sh/mm/gup.c | 277 ------------------------------------------ 4 files changed, 40 insertions(+), 278 deletions(-) delete mode 100644 arch/sh/mm/gup.c (limited to 'arch') diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index c7c99e18d5ff..02ae8132ebfe 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -15,6 +15,7 @@ config SUPERH select HAVE_ARCH_TRACEHOOK select HAVE_PERF_EVENTS select HAVE_DEBUG_BUGVERBOSE + select HAVE_GENERIC_GUP select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_GCOV_PROFILE_ALL @@ -64,6 +65,7 @@ config SUPERH config SUPERH32 def_bool "$(ARCH)" = "sh" select ARCH_32BIT_OFF_T + select GUP_GET_PTE_LOW_HIGH if X2TLB select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_IOREMAP_PROT if MMU && !X2TLB diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 3587103afe59..9085d1142fa3 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -149,6 +149,43 @@ extern void paging_init(void); extern void page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd); +static inline bool __pte_access_permitted(pte_t pte, u64 prot) +{ + return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot; +} + +#ifdef CONFIG_X2TLB +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + u64 prot = _PAGE_PRESENT; + + prot |= _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ); + if (write) + prot |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE); + return __pte_access_permitted(pte, prot); +} +#elif defined(CONFIG_SUPERH64) +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + u64 prot = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ; + + if (write) + prot |= _PAGE_WRITE; + return __pte_access_permitted(pte, prot); +} +#else +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + u64 prot = _PAGE_PRESENT | _PAGE_USER; + + if (write) + prot |= _PAGE_RW; + return __pte_access_permitted(pte, prot); +} +#endif + +#define pte_access_permitted pte_access_permitted + /* arch/sh/mm/mmap.c */ #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index fbe5e79751b3..5051b38fd5b6 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -17,7 +17,7 @@ cacheops-$(CONFIG_CPU_SHX3) += cache-shx3.o obj-y += $(cacheops-y) mmu-y := nommu.o extable_32.o -mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o gup.o ioremap.o kmap.o \ +mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o ioremap.o kmap.o \ pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o obj-y += $(mmu-y) diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c deleted file mode 100644 index 277c882f7489..000000000000 --- a/arch/sh/mm/gup.c +++ /dev/null @@ -1,277 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Lockless get_user_pages_fast for SuperH - * - * Copyright (C) 2009 - 2010 Paul Mundt - * - * Cloned from the x86 and PowerPC versions, by: - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#include -#include -#include -#include -#include - -static inline pte_t gup_get_pte(pte_t *ptep) -{ -#ifndef CONFIG_X2TLB - return READ_ONCE(*ptep); -#else - /* - * With get_user_pages_fast, we walk down the pagetables without - * taking any locks. For this we would like to load the pointers - * atomically, but that is not possible with 64-bit PTEs. What - * we do have is the guarantee that a pte will only either go - * from not present to present, or present to not present or both - * -- it will not switch to a completely different present page - * without a TLB flush in between; something that we are blocking - * by holding interrupts off. - * - * Setting ptes from not present to present goes: - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees l iff pte_high - * sees h. We load pte_high *after* loading pte_low, which ensures we - * don't see an older value of pte_high. *Then* we recheck pte_low, - * which ensures that we haven't picked up a changed pte high. We might - * have got rubbish values from pte_low and pte_high, but we are - * guaranteed that pte_low will not have the present bit set *unless* - * it is 'l'. And get_user_pages_fast only operates on present ptes, so - * we're safe. - * - * gup_get_pte should not be used or copied outside gup.c without being - * very careful -- it does not atomically load the pte or anything that - * is likely to be useful for you. - */ - pte_t pte; - -retry: - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - if (unlikely(pte.pte_low != ptep->pte_low)) - goto retry; - - return pte; -#endif -} - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - u64 mask, result; - pte_t *ptep; - -#ifdef CONFIG_X2TLB - result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ); - if (write) - result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE); -#elif defined(CONFIG_SUPERH64) - result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ; - if (write) - result |= _PAGE_WRITE; -#else - result = _PAGE_PRESENT | _PAGE_USER; - if (write) - result |= _PAGE_RW; -#endif - - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_map(&pmd, addr); - do { - pte_t pte = gup_get_pte(ptep); - struct page *page; - - if ((pte_val(pte) & mask) != result) { - pte_unmap(ptep); - return 0; - } - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - get_page(page); - __flush_anon_page(page, addr); - flush_dcache_page(page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - * Note a difference with get_user_pages_fast: this always returns the - * number of pages pinned, 0 if no pages were pinned. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (unlikely(!access_ok((void __user *)start, len))) - return 0; - - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @gup_flags: flags modifying pin behaviour - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - - end = start + len; - if (end < start) - goto slow_irqon; - - local_irq_disable(); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, - pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; - - { - int ret; - -slow: - local_irq_enable(); -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, pages, - gup_flags); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - - return ret; - } -} -- cgit v1.2.3-59-g8ed1b From d85507901f6a0a4fbd04cbdd567f4798a695b6d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:57:03 -0700 Subject: sparc64: add the missing pgd_page definition sparc64 only had pgd_page_vaddr, but not pgd_page. [hch@lst.de: fix sparc64 build] Link: http://lkml.kernel.org/r/20190626131318.GA5101@lst.de Link: http://lkml.kernel.org/r/20190625143715.1689-8-hch@lst.de Signed-off-by: Christoph Hellwig Cc: David Miller Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: James Hogan Cc: Jason Gunthorpe Cc: Khalid Aziz Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgtable_64.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 22500c3be7a9..439129af9266 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -864,6 +864,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud) #define pgd_present(pgd) (pgd_val(pgd) != 0U) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) +/* only used by the stubbed out hugetlb gup code, should never be called */ +#define pgd_page(pgd) NULL + static inline unsigned long pud_large(pud_t pud) { pte_t pte = __pte(pud_val(pud)); -- cgit v1.2.3-59-g8ed1b From 5875509d2f30eb8963163f255ded98095989142f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:57:07 -0700 Subject: sparc64: define untagged_addr() Add a helper to untag a user pointer. This is needed for ADI support in get_user_pages_fast. Link: http://lkml.kernel.org/r/20190625143715.1689-9-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Khalid Aziz Cc: David Miller Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: James Hogan Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgtable_64.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 439129af9266..8358ba00aa5d 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -1078,6 +1078,28 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, } #define io_remap_pfn_range io_remap_pfn_range +static inline unsigned long untagged_addr(unsigned long start) +{ + if (adi_capable()) { + long addr = start; + + /* If userspace has passed a versioned address, kernel + * will not find it in the VMAs since it does not store + * the version tags in the list of VMAs. Storing version + * tags in list of VMAs is impractical since they can be + * changed any time from userspace without dropping into + * kernel. Any address search in VMAs will be done with + * non-versioned addresses. Ensure the ADI version bits + * are dropped here by sign extending the last bit before + * ADI bits. IOMMU does not implement version tags. + */ + return (addr << (long)adi_nbits()) >> (long)adi_nbits(); + } + + return start; +} +#define untagged_addr untagged_addr + #include #include -- cgit v1.2.3-59-g8ed1b From 7b9afb86b6328f10dc2cad9223d7def12d60e505 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:57:11 -0700 Subject: sparc64: use the generic get_user_pages_fast code The sparc64 code is mostly equivalent to the generic one, minus various bugfixes and two arch overrides that this patch adds to pgtable.h. Link: http://lkml.kernel.org/r/20190625143715.1689-10-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Khalid Aziz Cc: David Miller Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: James Hogan Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/Kconfig | 1 + arch/sparc/include/asm/pgtable_64.h | 18 ++ arch/sparc/mm/Makefile | 2 +- arch/sparc/mm/gup.c | 340 ------------------------------------ 4 files changed, 20 insertions(+), 341 deletions(-) delete mode 100644 arch/sparc/mm/gup.c (limited to 'arch') diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 8fc95c7809ef..c59464755764 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -28,6 +28,7 @@ config SPARC select RTC_DRV_M48T59 select RTC_SYSTOHC select HAVE_ARCH_JUMP_LABEL if SPARC64 + select HAVE_GENERIC_GUP if SPARC64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_PCI_IOMAP diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 8358ba00aa5d..1599de730532 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -1100,6 +1100,24 @@ static inline unsigned long untagged_addr(unsigned long start) } #define untagged_addr untagged_addr +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + u64 prot; + + if (tlb_type == hypervisor) { + prot = _PAGE_PRESENT_4V | _PAGE_P_4V; + if (write) + prot |= _PAGE_WRITE_4V; + } else { + prot = _PAGE_PRESENT_4U | _PAGE_P_4U; + if (write) + prot |= _PAGE_WRITE_4U; + } + + return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot; +} +#define pte_access_permitted pte_access_permitted + #include #include diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index d39075b1e3b7..b078205b70e0 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -5,7 +5,7 @@ asflags-y := -ansi ccflags-y := -Werror -obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o +obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o obj-y += fault_$(BITS).o obj-y += init_$(BITS).o obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c deleted file mode 100644 index 1e770a517d4a..000000000000 --- a/arch/sparc/mm/gup.c +++ /dev/null @@ -1,340 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Lockless get_user_pages_fast for sparc, cribbed from powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - if (tlb_type == hypervisor) { - result = _PAGE_PRESENT_4V|_PAGE_P_4V; - if (write) - result |= _PAGE_WRITE_4V; - } else { - result = _PAGE_PRESENT_4U|_PAGE_P_4U; - if (write) - result |= _PAGE_WRITE_4U; - } - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - struct page *page, *head; - pte_t pte = *ptep; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - /* The hugepage case is simplified on sparc64 because - * we encode the sub-page pfn offsets into the - * hugepage PTEs. We could optimize this in the future - * use page_cache_add_speculative() for the hugepage case. - */ - page = pte_page(pte); - head = compound_head(page); - if (!page_cache_get_speculative(head)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(head); - return 0; - } - - pages[*nr] = page; - (*nr)++; - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, - int *nr) -{ - struct page *head, *page; - int refs; - - if (!(pmd_val(pmd) & _PAGE_VALID)) - return 0; - - if (write && !pmd_write(pmd)) - return 0; - - refs = 0; - page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - head = compound_head(page); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, - unsigned long end, int write, struct page **pages, - int *nr) -{ - struct page *head, *page; - int refs; - - if (!(pud_val(pud) & _PAGE_VALID)) - return 0; - - if (write && !pud_write(pud)) - return 0; - - refs = 0; - page = pud_page(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - head = compound_head(page); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pud_val(pud) != pud_val(*pudp))) { - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (unlikely(pmd_large(pmd))) { - if (!gup_huge_pmd(pmdp, pmd, addr, next, - write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, - pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_large(pud))) { - if (!gup_huge_pud(pudp, pud, addr, next, - write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -/* - * Note a difference with get_user_pages_fast: this always returns the - * number of pages pinned, 0 if no pages were pinned. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next, flags; - pgd_t *pgdp; - int nr = 0; - -#ifdef CONFIG_SPARC64 - if (adi_capable()) { - long addr = start; - - /* If userspace has passed a versioned address, kernel - * will not find it in the VMAs since it does not store - * the version tags in the list of VMAs. Storing version - * tags in list of VMAs is impractical since they can be - * changed any time from userspace without dropping into - * kernel. Any address search in VMAs will be done with - * non-versioned addresses. Ensure the ADI version bits - * are dropped here by sign extending the last bit before - * ADI bits. IOMMU does not implement version tags. - */ - addr = (addr << (long)adi_nbits()) >> (long)adi_nbits(); - start = addr; - } -#endif - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int nr = 0; - -#ifdef CONFIG_SPARC64 - if (adi_capable()) { - long addr = start; - - /* If userspace has passed a versioned address, kernel - * will not find it in the VMAs since it does not store - * the version tags in the list of VMAs. Storing version - * tags in list of VMAs is impractical since they can be - * changed any time from userspace without dropping into - * kernel. Any address search in VMAs will be done with - * non-versioned addresses. Ensure the ADI version bits - * are dropped here by sign extending the last bit before - * ADI bits. IOMMU does not implements version tags, - */ - addr = (addr << (long)adi_nbits()) >> (long)adi_nbits(); - start = addr; - } -#endif - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on sparc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_disable(); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, - pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; - - { - int ret; - -slow: - local_irq_enable(); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, pages, - gup_flags); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - - return ret; - } -} -- cgit v1.2.3-59-g8ed1b From 67a929e097b774c69253c8b61ef9eb8a42b463a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:57:14 -0700 Subject: mm: rename CONFIG_HAVE_GENERIC_GUP to CONFIG_HAVE_FAST_GUP We only support the generic GUP now, so rename the config option to be more clear, and always use the mm/Kconfig definition of the symbol and select it from the arch Kconfigs. Link: http://lkml.kernel.org/r/20190625143715.1689-11-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Khalid Aziz Reviewed-by: Jason Gunthorpe Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: David Miller Cc: James Hogan Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 5 +---- arch/arm64/Kconfig | 4 +--- arch/mips/Kconfig | 2 +- arch/powerpc/Kconfig | 2 +- arch/s390/Kconfig | 2 +- arch/sh/Kconfig | 2 +- arch/sparc/Kconfig | 2 +- arch/x86/Kconfig | 4 +--- mm/Kconfig | 2 +- mm/gup.c | 4 ++-- 10 files changed, 11 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0c55aa199c67..2bf1ce39a96d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -75,6 +75,7 @@ config ARM select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD + select HAVE_FAST_GUP if ARM_LPAE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL @@ -1625,10 +1626,6 @@ config ARCH_SPARSEMEM_DEFAULT config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM -config HAVE_GENERIC_GUP - def_bool y - depends on ARM_LPAE - config HIGHMEM bool "High Memory Support" depends on MMU diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c085aec9459b..a36ff61321ce 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -143,6 +143,7 @@ config ARM64 select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER @@ -267,9 +268,6 @@ config ZONE_DMA32 bool "Support DMA32 zone" if EXPERT default y -config HAVE_GENERIC_GUP - def_bool y - config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 51c05f669d57..7957d3457156 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -53,10 +53,10 @@ config MIPS select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EXIT_THREAD + select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER - select HAVE_GENERIC_GUP select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3b795a0cab62..959866c156de 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -185,12 +185,12 @@ config PPC select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL select HAVE_EBPF_JIT if PPC64 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC - select HAVE_GENERIC_GUP select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f089ae375f6b..5d8570ed6cab 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -139,6 +139,7 @@ config S390 select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_FAST_GUP select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD @@ -146,7 +147,6 @@ config S390 select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS - select HAVE_GENERIC_GUP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 02ae8132ebfe..31a7d12db705 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -15,7 +15,7 @@ config SUPERH select HAVE_ARCH_TRACEHOOK select HAVE_PERF_EVENTS select HAVE_DEBUG_BUGVERBOSE - select HAVE_GENERIC_GUP + select HAVE_FAST_GUP if MMU select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c59464755764..e9f5d62e9817 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -28,7 +28,7 @@ config SPARC select RTC_DRV_M48T59 select RTC_SYSTOHC select HAVE_ARCH_JUMP_LABEL if SPARC64 - select HAVE_GENERIC_GUP if SPARC64 + select HAVE_FAST_GUP if SPARC64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_PCI_IOMAP diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 71c1f7864434..9df2d1cb7a9e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -159,6 +159,7 @@ config X86 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EISA select HAVE_EXIT_THREAD + select HAVE_FAST_GUP select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER @@ -2907,9 +2908,6 @@ config HAVE_ATOMIC_IOMAP config X86_DEV_DMA_OPS bool -config HAVE_GENERIC_GUP - def_bool y - source "drivers/firmware/Kconfig" source "arch/x86/kvm/Kconfig" diff --git a/mm/Kconfig b/mm/Kconfig index 53e2ca54b385..b5a258d62465 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -132,7 +132,7 @@ config HAVE_MEMBLOCK_NODE_MAP config HAVE_MEMBLOCK_PHYS_MAP bool -config HAVE_GENERIC_GUP +config HAVE_FAST_GUP bool config ARCH_KEEP_MEMBLOCK diff --git a/mm/gup.c b/mm/gup.c index 2093283e4933..2f69b30a11d3 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1651,7 +1651,7 @@ struct page *get_dump_page(unsigned long addr) #endif /* CONFIG_ELF_CORE */ /* - * Generic Fast GUP + * Fast GUP * * get_user_pages_fast attempts to pin user pages by walking the page * tables directly and avoids taking locks. Thus the walker needs to be @@ -1683,7 +1683,7 @@ struct page *get_dump_page(unsigned long addr) * * This code is based heavily on the PowerPC implementation by Nick Piggin. */ -#ifdef CONFIG_HAVE_GENERIC_GUP +#ifdef CONFIG_HAVE_FAST_GUP #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH /* * WARNING: only to be used in the get_user_pages_fast() implementation. -- cgit v1.2.3-59-g8ed1b From cbd34da7dc9afd521e0bea5e7d12701f4a9da7c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jul 2019 20:57:28 -0700 Subject: mm: move the powerpc hugepd code to mm/gup.c While only powerpc supports the hugepd case, the code is pretty generic and I'd like to keep all GUP internals in one place. Link: http://lkml.kernel.org/r/20190625143715.1689-15-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Andrey Konovalov Cc: Benjamin Herrenschmidt Cc: David Miller Cc: James Hogan Cc: Jason Gunthorpe Cc: Khalid Aziz Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul Burton Cc: Paul Mackerras Cc: Ralf Baechle Cc: Rich Felker Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 1 + arch/powerpc/mm/hugetlbpage.c | 72 ------------------------------------- include/linux/hugetlb.h | 18 ---------- mm/Kconfig | 10 ++++++ mm/gup.c | 82 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 93 insertions(+), 90 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 959866c156de..24a41f919309 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -125,6 +125,7 @@ config PPC select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV + select ARCH_HAS_HUGEPD if HUGETLB_PAGE select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b5d92dc32844..51716c11d0fb 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -511,13 +511,6 @@ retry: return page; } -static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, - unsigned long sz) -{ - unsigned long __boundary = (addr + sz) & ~(sz-1); - return (__boundary - 1 < end - 1) ? __boundary : end; -} - #ifdef CONFIG_PPC_MM_SLICES unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -665,68 +658,3 @@ void flush_dcache_icache_hugepage(struct page *page) } } } - -static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long pte_end; - struct page *head, *page; - pte_t pte; - int refs; - - pte_end = (addr + sz) & ~(sz-1); - if (pte_end < end) - end = pte_end; - - pte = READ_ONCE(*ptep); - - if (!pte_access_permitted(pte, write)) - return 0; - - /* hugepages are never "special" */ - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - - page = head + ((addr & (sz-1)) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - /* Could be optimized better */ - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, - unsigned long end, int write, struct page **pages, int *nr) -{ - pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(hugepd); - unsigned long next; - - ptep = hugepte_offset(hugepd, addr, pdshift); - do { - next = hugepte_addr_end(addr, end, sz); - if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) - return 0; - } while (ptep++, addr = next, addr != end); - - return 1; -} diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f895a79c6f5c..edfca4278319 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -16,29 +16,11 @@ struct user_struct; struct mmu_gather; #ifndef is_hugepd -/* - * Some architectures requires a hugepage directory format that is - * required to support multiple hugepage sizes. For example - * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables" - * introduced the same on powerpc. This allows for a more flexible hugepage - * pagetable layout. - */ typedef struct { unsigned long pd; } hugepd_t; #define is_hugepd(hugepd) (0) #define __hugepd(x) ((hugepd_t) { (x) }) -static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, - unsigned pdshift, unsigned long end, - int write, struct page **pages, int *nr) -{ - return 0; -} -#else -extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr, - unsigned pdshift, unsigned long end, - int write, struct page **pages, int *nr); #endif - #ifdef CONFIG_HUGETLB_PAGE #include diff --git a/mm/Kconfig b/mm/Kconfig index 48840b28482b..0b4352557dd5 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -769,4 +769,14 @@ config GUP_GET_PTE_LOW_HIGH config ARCH_HAS_PTE_SPECIAL bool +# +# Some architectures require a special hugepage directory format that is +# required to support multiple hugepage sizes. For example a4fe3ce76 +# "powerpc/mm: Allow more flexible layouts for hugepage pagetables" +# introduced it on powerpc. This allows for a more flexible hugepage +# pagetable layouts. +# +config ARCH_HAS_HUGEPD + bool + endmenu diff --git a/mm/gup.c b/mm/gup.c index 9d68cef2fa90..2f8bf7a71c74 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1966,6 +1966,88 @@ static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr, } #endif +#ifdef CONFIG_ARCH_HAS_HUGEPD +static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, + unsigned long sz) +{ + unsigned long __boundary = (addr + sz) & ~(sz-1); + return (__boundary - 1 < end - 1) ? __boundary : end; +} + +static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long pte_end; + struct page *head, *page; + pte_t pte; + int refs; + + pte_end = (addr + sz) & ~(sz-1); + if (pte_end < end) + end = pte_end; + + pte = READ_ONCE(*ptep); + + if (!pte_access_permitted(pte, write)) + return 0; + + /* hugepages are never "special" */ + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + + page = head + ((addr & (sz-1)) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + /* Could be optimized better */ + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + return 1; +} + +static int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned int pdshift, unsigned long end, int write, + struct page **pages, int *nr) +{ + pte_t *ptep; + unsigned long sz = 1UL << hugepd_shift(hugepd); + unsigned long next; + + ptep = hugepte_offset(hugepd, addr, pdshift); + do { + next = hugepte_addr_end(addr, end, sz); + if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) + return 0; + } while (ptep++, addr = next, addr != end); + + return 1; +} +#else +static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr, + unsigned pdshift, unsigned long end, int write, + struct page **pages, int *nr) +{ + return 0; +} +#endif /* CONFIG_ARCH_HAS_HUGEPD */ + static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { -- cgit v1.2.3-59-g8ed1b From 5fba4af4456b5d3f982d4ac1c879d16b36aaa0fb Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:57:49 -0700 Subject: asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel] Most architectures have identical or very similar implementation of pte_alloc_one_kernel(), pte_alloc_one(), pte_free_kernel() and pte_free(). Add a generic implementation that can be reused across architectures and enable its use on x86. The generic implementation uses GFP_KERNEL | __GFP_ZERO for the kernel page tables and GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT for the user page tables. The "base" functions for PTE allocation, namely __pte_alloc_one_kernel() and __pte_alloc_one() are intended for the architectures that require additional actions after actual memory allocation or must use non-default GFP flags. x86 is switched to use generic pte_alloc_one_kernel(), pte_free_kernel() and pte_free(). x86 still implements pte_alloc_one() to allow run-time control of GFP flags required for "userpte" command line option. Link: http://lkml.kernel.org/r/1557296232-15361-2-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgalloc.h | 19 ++------ arch/x86/mm/pgtable.c | 33 ++++--------- include/asm-generic/pgalloc.h | 107 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 115 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index a281e61ec60c..29aa7859bdee 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -6,6 +6,9 @@ #include /* for struct page */ #include +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include /* for pte_{alloc,free}_one */ + static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } #ifdef CONFIG_PARAVIRT_XXL @@ -47,24 +50,8 @@ extern gfp_t __userpte_alloc_gfp; extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -extern pte_t *pte_alloc_one_kernel(struct mm_struct *); extern pgtable_t pte_alloc_one(struct mm_struct *); -/* Should really implement gc for free page table pages. This could be - done with a reference count in struct page. */ - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte); static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1f67b1e15bf6..44816ff6411f 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -13,33 +13,17 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; EXPORT_SYMBOL(physical_mask); #endif -#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) - #ifdef CONFIG_HIGHPTE -#define PGALLOC_USER_GFP __GFP_HIGHMEM +#define PGTABLE_HIGHMEM __GFP_HIGHMEM #else -#define PGALLOC_USER_GFP 0 +#define PGTABLE_HIGHMEM 0 #endif -gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; - -pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT); -} +gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; pgtable_t pte_alloc_one(struct mm_struct *mm) { - struct page *pte; - - pte = alloc_pages(__userpte_alloc_gfp, 0); - if (!pte) - return NULL; - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; + return __pte_alloc_one(mm, __userpte_alloc_gfp); } static int __init setup_userpte(char *arg) @@ -235,7 +219,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; bool failed = false; - gfp_t gfp = PGALLOC_GFP; + gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp &= ~__GFP_ACCOUNT; @@ -399,14 +383,14 @@ static inline pgd_t *_pgd_alloc(void) * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) - return (pgd_t *)__get_free_pages(PGALLOC_GFP, + return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, PGD_ALLOCATION_ORDER); /* * Now PAE kernel is not running as a Xen domain. We can allocate * a 32-byte slab for pgd to save memory space. */ - return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); + return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER); } static inline void _pgd_free(pgd_t *pgd) @@ -424,7 +408,8 @@ void __init pgd_cache_init(void) static inline pgd_t *_pgd_alloc(void) { - return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); + return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, + PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 948714c1535a..8476175c07e7 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -1,13 +1,112 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_GENERIC_PGALLOC_H #define __ASM_GENERIC_PGALLOC_H -/* - * an empty file is enough for a nommu architecture - */ + #ifdef CONFIG_MMU -#error need to implement an architecture specific asm/pgalloc.h + +#define GFP_PGTABLE_KERNEL (GFP_KERNEL | __GFP_ZERO) +#define GFP_PGTABLE_USER (GFP_PGTABLE_KERNEL | __GFP_ACCOUNT) + +/** + * __pte_alloc_one_kernel - allocate a page for PTE-level kernel page table + * @mm: the mm_struct of the current context + * + * This function is intended for architectures that need + * anything beyond simple page allocation. + * + * Return: pointer to the allocated memory or %NULL on error + */ +static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) +{ + return (pte_t *)__get_free_page(GFP_PGTABLE_KERNEL); +} + +#ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +/** + * pte_alloc_one_kernel - allocate a page for PTE-level kernel page table + * @mm: the mm_struct of the current context + * + * Return: pointer to the allocated memory or %NULL on error + */ +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +{ + return __pte_alloc_one_kernel(mm); +} +#endif + +/** + * pte_free_kernel - free PTE-level kernel page table page + * @mm: the mm_struct of the current context + * @pte: pointer to the memory containing the page table + */ +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long)pte); +} + +/** + * __pte_alloc_one - allocate a page for PTE-level user page table + * @mm: the mm_struct of the current context + * @gfp: GFP flags to use for the allocation + * + * Allocates a page and runs the pgtable_page_ctor(). + * + * This function is intended for architectures that need + * anything beyond simple page allocation or must have custom GFP flags. + * + * Return: `struct page` initialized as page table or %NULL on error + */ +static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) +{ + struct page *pte; + + pte = alloc_page(gfp); + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } + + return pte; +} + +#ifndef __HAVE_ARCH_PTE_ALLOC_ONE +/** + * pte_alloc_one - allocate a page for PTE-level user page table + * @mm: the mm_struct of the current context + * + * Allocates a page and runs the pgtable_page_ctor(). + * + * Return: `struct page` initialized as page table or %NULL on error + */ +static inline pgtable_t pte_alloc_one(struct mm_struct *mm) +{ + return __pte_alloc_one(mm, GFP_PGTABLE_USER); +} #endif +/* + * Should really implement gc for free page table pages. This could be + * done with a reference count in struct page. + */ + +/** + * pte_free - free PTE-level user page table page + * @mm: the mm_struct of the current context + * @pte_page: the `struct page` representing the page table + */ +static inline void pte_free(struct mm_struct *mm, struct page *pte_page) +{ + pgtable_page_dtor(pte_page); + __free_page(pte_page); +} + +#else /* CONFIG_MMU */ + +/* This is enough for a nommu architecture */ #define check_pgt_cache() do { } while (0) +#endif /* CONFIG_MMU */ + #endif /* __ASM_GENERIC_PGALLOC_H */ -- cgit v1.2.3-59-g8ed1b From bc3ace9b520f97d5650d096a5f95cac3fa64e204 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:57:53 -0700 Subject: alpha: switch to generic version of pte allocation alpha allocates PTE pages with __get_free_page() and uses GFP_KERNEL | __GFP_ZERO for the allocations. Switch it to the generic version that does exactly the same thing for the kernel page tables and adds __GFP_ACCOUNT for the user PTEs. The alpha pte_free() and pte_free_kernel() versions are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-3-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgalloc.h | 40 +++------------------------------------- 1 file changed, 3 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index 02f9f91bb4f0..71ded3b7d82d 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -5,6 +5,8 @@ #include #include +#include /* for pte_{alloc,free}_one */ + /* * Allocate and free page tables. The xxx_kernel() versions are * used to allocate a kernel page table - this turns on ASN bits @@ -41,7 +43,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); + pmd_t *ret = (pmd_t *)__get_free_page(GFP_PGTABLE_USER); return ret; } @@ -51,42 +53,6 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd) free_page((unsigned long)pmd); } -static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); - return pte; -} - -static inline void -pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline pgtable_t -pte_alloc_one(struct mm_struct *mm) -{ - pte_t *pte = pte_alloc_one_kernel(mm); - struct page *page; - - if (!pte) - return NULL; - page = virt_to_page(pte); - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; -} - -static inline void -pte_free(struct mm_struct *mm, pgtable_t page) -{ - pgtable_page_dtor(page); - __free_page(page); -} - #define check_pgt_cache() do { } while (0) #endif /* _ALPHA_PGALLOC_H */ -- cgit v1.2.3-59-g8ed1b From 28bcf5937536062d96ee0b581a76a0b1b652eec6 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:57:57 -0700 Subject: arm: switch to generic version of pte allocation Replace __get_free_page() and alloc_pages() calls with the generic __pte_alloc_one_kernel() and __pte_alloc_one(). There is no functional change for the kernel PTE allocation. The difference for the user PTEs, is that the clear_pte_table() is now called after pgtable_page_ctor() and the addition of __GFP_ACCOUNT to the GFP flags. The conversion to the generic version of pte_free_kernel() removes the NULL check for pte. The pte_free() version on arm is identical to the generic one and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-4-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/pgalloc.h | 41 +++++++++++++---------------------------- arch/arm/mm/mmu.c | 2 +- 2 files changed, 14 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index c038cff6fdd3..a2a68b751971 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -54,8 +54,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) - static inline void clean_pte_table(pte_t *pte) { clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE); @@ -77,54 +75,41 @@ static inline void clean_pte_table(pte_t *pte) * | h/w pt 1 | * +------------+ */ + +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include + static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm) { - pte_t *pte; + pte_t *pte = __pte_alloc_one_kernel(mm); - pte = (pte_t *)__get_free_page(PGALLOC_GFP); if (pte) clean_pte_table(pte); return pte; } +#ifdef CONFIG_HIGHPTE +#define PGTABLE_HIGHMEM __GFP_HIGHMEM +#else +#define PGTABLE_HIGHMEM 0 +#endif + static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { struct page *pte; -#ifdef CONFIG_HIGHPTE - pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); -#else - pte = alloc_pages(PGALLOC_GFP, 0); -#endif + pte = __pte_alloc_one(mm, GFP_PGTABLE_USER | PGTABLE_HIGHMEM); if (!pte) return NULL; if (!PageHighMem(pte)) clean_pte_table(page_address(pte)); - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } return pte; } -/* - * Free one PTE table. - */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - if (pte) - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, pmdval_t prot) { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 1aa2586fa597..d9a0038774a6 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -729,7 +729,7 @@ static void __init *early_alloc(unsigned long sz) static void *__init late_alloc(unsigned long sz) { - void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz)); + void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz)); if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) BUG(); -- cgit v1.2.3-59-g8ed1b From 50f11a8a4620eee6b6831e69ab5d42456546d7d8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:02 -0700 Subject: arm64: switch to generic version of pte allocation The PTE allocations in arm64 are identical to the generic ones modulo the GFP flags. Using the generic pte_alloc_one() functions ensures that the user page tables are allocated with __GFP_ACCOUNT set. The arm64 definition of PGALLOC_GFP is removed and replaced with GFP_PGTABLE_USER for p[gum]d_alloc_one() for the user page tables and GFP_PGTABLE_KERNEL for the kernel page tables. The KVM memory cache is now using GFP_PGTABLE_USER. The mappings created with create_pgd_mapping() are now using GFP_PGTABLE_KERNEL. The conversion to the generic version of pte_free_kernel() removes the NULL check for pte. The pte_free() version on arm64 is identical to the generic one and can be simply dropped. [cai@lca.pw: fix a bogus GFP flag in pgd_alloc()] Link: https://lore.kernel.org/r/1559656836-24940-1-git-send-email-cai@lca.pw/ [and fix it more] Link: https://lore.kernel.org/linux-mm/20190617151252.GF16810@rapoport-lnx/ Link: http://lkml.kernel.org/r/1557296232-15361-5-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/pgalloc.h | 47 +++++++--------------------------------- arch/arm64/mm/mmu.c | 2 +- arch/arm64/mm/pgd.c | 6 +++-- virt/kvm/arm/mmu.c | 2 +- 4 files changed, 14 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index cdced518378d..14d0bc44d451 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -13,18 +13,23 @@ #include #include +#include /* for pte_{alloc,free}_one */ + #define check_pgt_cache() do { } while (0) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { + gfp_t gfp = GFP_PGTABLE_USER; struct page *page; - page = alloc_page(PGALLOC_GFP); + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + + page = alloc_page(gfp); if (!page) return NULL; if (!pgtable_pmd_page_ctor(page)) { @@ -61,7 +66,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)__get_free_page(PGALLOC_GFP); + return (pud_t *)__get_free_page(GFP_PGTABLE_USER); } static inline void pud_free(struct mm_struct *mm, pud_t *pudp) @@ -89,42 +94,6 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); -static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page(PGALLOC_GFP); -} - -static inline pgtable_t -pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_pages(PGALLOC_GFP, 0); - if (!pte) - return NULL; - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; -} - -/* - * Free a PTE table. - */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep) -{ - if (ptep) - free_page((unsigned long)ptep); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, pmdval_t prot) { diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3645f29bd814..1b49c08dfa2b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -362,7 +362,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, static phys_addr_t __pgd_pgtable_alloc(int shift) { - void *ptr = (void *)__get_free_page(PGALLOC_GFP); + void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); BUG_ON(!ptr); /* Ensure the zeroed page is visible to the page table walker */ diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 9a0c7d5090d6..7548f9ca1f11 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -19,10 +19,12 @@ static struct kmem_cache *pgd_cache __ro_after_init; pgd_t *pgd_alloc(struct mm_struct *mm) { + gfp_t gfp = GFP_PGTABLE_USER; + if (PGD_SIZE == PAGE_SIZE) - return (pgd_t *)__get_free_page(PGALLOC_GFP); + return (pgd_t *)__get_free_page(gfp); else - return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); + return kmem_cache_alloc(pgd_cache, gfp); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 198e5171e1f7..38b4c910b6c3 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -129,7 +129,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, if (cache->nobjs >= min) return 0; while (cache->nobjs < max) { - page = (void *)__get_free_page(PGALLOC_GFP); + page = (void *)__get_free_page(GFP_PGTABLE_USER); if (!page) return -ENOMEM; cache->objects[cache->nobjs++] = page; -- cgit v1.2.3-59-g8ed1b From bd5ff066514c2dcffd443cfaa55580db0f19caf8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:06 -0700 Subject: csky: switch to generic version of pte allocation The csky implementation pte_alloc_one(), pte_free_kernel() and pte_free() is identical to the generic except of lack of __GFP_ACCOUNT for the user PTEs allocation. Switch csky to use generic version of these functions. The csky implementation of pte_alloc_one_kernel() is not replaced because it does not clear the allocated page but rather sets each PTE in it to a non-zero value. The pte_free_kernel() and pte_free() versions on csky are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-6-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Guo Ren Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/csky/include/asm/pgalloc.h | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index d213bb47b717..98c5716708d6 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -8,6 +8,9 @@ #include #include +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#include /* for pte_{alloc,free}_one */ + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -39,33 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) return pte; } -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); - if (!pte) - return NULL; - - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_pages((unsigned long)pte, PTE_ORDER); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_pages(pte, PTE_ORDER); -} - static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_pages((unsigned long)pgd, PGD_ORDER); -- cgit v1.2.3-59-g8ed1b From 14c0a39c9af9a25e0f94f9be89431c2debb34f2c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:10 -0700 Subject: m68k: sun3: switch to generic version of pte allocation The sun3 MMU variant of m68k uses GFP_KERNEL to allocate a PTE page and then memset(0) or clear_highpage() to clear it. This is equivalent to allocating the page with GFP_KERNEL | __GFP_ZERO, which allows replacing sun3 implementation of pte_alloc_one() and pte_alloc_one_kernel() with the generic ones. The pte_free() and pte_free_kernel() versions are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-8-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m68k/include/asm/sun3_pgalloc.h | 41 ++---------------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 1456c5eecbd9..1a8ddbd0d23c 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -13,55 +13,18 @@ #include +#include /* for pte_{alloc,free}_one */ + extern const char bad_pmd_string[]; #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long) pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t page) -{ - pgtable_page_dtor(page); - __free_page(page); -} - #define __pte_free_tlb(tlb,pte,addr) \ do { \ pgtable_page_dtor(pte); \ tlb_remove_page((tlb), pte); \ } while (0) -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - unsigned long page = __get_free_page(GFP_KERNEL); - - if (!page) - return NULL; - - memset((void *)page, 0, PAGE_SIZE); - return (pte_t *) (page); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *page = alloc_pages(GFP_KERNEL, 0); - - if (page == NULL) - return NULL; - - clear_highpage(page); - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; - -} - static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { pmd_val(*pmd) = __pa((unsigned long)pte); -- cgit v1.2.3-59-g8ed1b From b7902ce175476b767e6c614fded293faf906deee Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:14 -0700 Subject: mips: switch to generic version of pte allocation MIPS allocates kernel PTE pages with __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER) and user PTE pages with pte = alloc_pages(GFP_KERNEL, PTE_ORDER) and then uses clear_highpage(pte) to zero out the allocated page for the user page tables. The PTE_ORDER is hardwired to zero, which makes MIPS implementation almost identical to the generic one. Switch MIPS to the generic version that does exactly the same thing for the kernel page tables and adds __GFP_ACCOUNT for the user PTEs. The pte_free_kernel() and pte_free() versions on mips are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-9-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Paul Burton Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/pgalloc.h | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 27808d9461f4..aa16b85ddffc 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -13,6 +13,8 @@ #include #include +#include /* for pte_{alloc,free}_one */ + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -50,37 +52,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_pages((unsigned long)pgd, PGD_ORDER); } -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER); -} - -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL, PTE_ORDER); - if (!pte) - return NULL; - clear_highpage(pte); - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_pages((unsigned long)pte, PTE_ORDER); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_pages(pte, PTE_ORDER); -} - #define __pte_free_tlb(tlb,pte,address) \ do { \ pgtable_page_dtor(pte); \ -- cgit v1.2.3-59-g8ed1b From f52a8e1a67cde67c33d5c2eabd6494dcab956677 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:18 -0700 Subject: nds32: switch to generic version of pte allocation The nds32 implementation of pte_alloc_one_kernel() differs from the generic in the use of __GFP_RETRY_MAYFAIL flag, which is removed after the conversion. The nds32 version of pte_alloc_one() missed the call to pgtable_page_ctor() and also used __GFP_RETRY_MAYFAIL. Switching it to use generic __pte_alloc_one() for the PTE page allocation ensures that page table constructor is run and the user page tables are allocated with __GFP_ACCOUNT. The conversion to the generic version of pte_free_kernel() removes the NULL check for pte. The pte_free() version on nds32 is identical to the generic one and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-10-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nds32/include/asm/pgalloc.h | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h index 3cbc749c79aa..e78b43d8389f 100644 --- a/arch/nds32/include/asm/pgalloc.h +++ b/arch/nds32/include/asm/pgalloc.h @@ -9,6 +9,9 @@ #include #include +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include /* for pte_{alloc,free}_one */ + /* * Since we have only two-level page tables, these are trivial */ @@ -22,43 +25,17 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd); #define check_pgt_cache() do { } while (0) -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte; - - pte = - (pte_t *) __get_free_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | - __GFP_ZERO); - - return pte; -} - static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { pgtable_t pte; - pte = alloc_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO, 0); + pte = __pte_alloc_one(mm, GFP_PGTABLE_USER); if (pte) cpu_dcache_wb_page((unsigned long)page_address(pte)); return pte; } -/* - * Free one PTE table. - */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t * pte) -{ - if (pte) { - free_page((unsigned long)pte); - } -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - __free_page(pte); -} - /* * Populate the pmdp entry with a pointer to the pte. This pmd is part * of the mm address space. -- cgit v1.2.3-59-g8ed1b From fc7835c2f8ea800ded22f68bd782cd17a6dd83cd Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:22 -0700 Subject: nios2: switch to generic version of pte allocation nios2 allocates kernel PTE pages with __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER); and user page tables with pte = alloc_pages(GFP_KERNEL, PTE_ORDER); if (pte) clear_highpage(); The PTE_ORDER is hardwired to zero, which makes nios2 implementation almost identical to the generic one. Switch nios2 to the generic version that does exactly the same thing for the kernel page tables and adds __GFP_ACCOUNT for the user PTEs. The pte_free_kernel() and pte_free() versions on nios2 are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-11-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/nios2/include/asm/pgalloc.h | 37 ++----------------------------------- 1 file changed, 2 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 3a149ead1207..4bc8cf72067e 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -12,6 +12,8 @@ #include +#include /* for pte_{alloc,free}_one */ + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -37,41 +39,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_pages((unsigned long)pgd, PGD_ORDER); } -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte; - - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); - - return pte; -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL, PTE_ORDER); - if (pte) { - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - clear_highpage(pte); - } - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_pages((unsigned long)pte, PTE_ORDER); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_page_dtor(pte); - __free_pages(pte, PTE_ORDER); -} - #define __pte_free_tlb(tlb, pte, addr) \ do { \ pgtable_page_dtor(pte); \ -- cgit v1.2.3-59-g8ed1b From 3f4a13085dd88cb806a2c64fb1286e9cf3a98cd0 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:27 -0700 Subject: parisc: switch to generic version of pte allocation parisc allocates PTE pages with __get_free_page() and uses GFP_KERNEL | __GFP_ZERO for the allocations. Switch it to the generic version that does exactly the same thing for the kernel page tables and adds __GFP_ACCOUNT for the user PTEs. The pte_free_kernel() and pte_free() versions on are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-12-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/pgalloc.h | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index ea75cc966dae..4f2059a50fae 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -10,6 +10,8 @@ #include +#include /* for pte_{alloc,free}_one */ + /* Allocate the top level pgd (page directory) * * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we @@ -122,37 +124,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) pmd_populate_kernel(mm, pmd, page_address(pte_page)) #define pmd_pgtable(pmd) pmd_page(pmd) -static inline pgtable_t -pte_alloc_one(struct mm_struct *mm) -{ - struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; -} - -static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_page_dtor(pte); - pte_free_kernel(mm, page_address(pte)); -} - #define check_pgt_cache() do { } while (0) #endif -- cgit v1.2.3-59-g8ed1b From d1b46fe50c8b0e0b4035c48ccd5f655aa7ceea16 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:31 -0700 Subject: riscv: switch to generic version of pte allocation The only difference between the generic and RISC-V implementation of PTE allocation is the usage of __GFP_RETRY_MAYFAIL for both kernel and user PTEs and the absence of __GFP_ACCOUNT for the user PTEs. The conversion to the generic version removes the __GFP_RETRY_MAYFAIL and ensures that GFP_ACCOUNT is used for the user PTE allocations. The pte_free() and pte_free_kernel() versions are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-13-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Palmer Dabbelt Cc: Albert Ou Cc: Anshuman Khandual Cc: Anton Ivanov Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/riscv/include/asm/pgalloc.h | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index eb8b0195f27f..56a67d66f72f 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -10,6 +10,8 @@ #include #include +#include /* for pte_{alloc,free}_one */ + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -74,33 +76,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #endif /* __PAGETABLE_PMD_FOLDED */ -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page( - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO); -} - -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO); - if (likely(pte != NULL)) - pgtable_page_ctor(pte); - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - #define __pte_free_tlb(tlb, pte, buf) \ do { \ pgtable_page_dtor(pte); \ -- cgit v1.2.3-59-g8ed1b From f32848e16939e1407ad3413f7faa3e0a8ad802eb Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:35 -0700 Subject: um: switch to generic version of pte allocation um allocates PTE pages with __get_free_page() and uses GFP_KERNEL | __GFP_ZERO for the allocations. Switch it to the generic version that does exactly the same thing for the kernel page tables and adds __GFP_ACCOUNT for the user PTEs. The pte_free() and pte_free_kernel() versions are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-14-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Anton Ivanov Acked-by: Anton Ivanov Cc: Albert Ou Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Vincent Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/asm/pgalloc.h | 16 ++-------------- arch/um/kernel/mem.c | 22 ---------------------- 2 files changed, 2 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 99eb5682792a..d7b282e9c4d5 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -10,6 +10,8 @@ #include +#include /* for pte_{alloc,free}_one */ + #define pmd_populate_kernel(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte))) @@ -25,20 +27,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -extern pte_t *pte_alloc_one_kernel(struct mm_struct *); -extern pgtable_t pte_alloc_one(struct mm_struct *); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long) pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - #define __pte_free_tlb(tlb,pte, address) \ do { \ pgtable_page_dtor(pte); \ diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index a9c9a94c096f..de58e976b9bc 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -208,28 +208,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_page((unsigned long) pgd); } -pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte; - - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); - return pte; -} - -pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (!pte) - return NULL; - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; -} - #ifdef CONFIG_3_LEVEL_PGTABLES pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { -- cgit v1.2.3-59-g8ed1b From c2471e79a7ea0f48e3ae9253e1f3688a44cc944d Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 11 Jul 2019 20:58:39 -0700 Subject: unicore32: switch to generic version of pte allocation Replace __get_free_page() and alloc_pages() calls with the generic __pte_alloc_one_kernel() and __pte_alloc_one(). There is no functional change for the kernel PTE allocation. The difference for the user PTEs, is that the clear_pte_table() is now called after pgtable_page_ctor() and the addition of __GFP_ACCOUNT to the GFP flags. The pte_free() and pte_free_kernel() versions are identical to the generic ones and can be simply dropped. Link: http://lkml.kernel.org/r/1557296232-15361-15-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Cc: Arnd Bergmann Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Guan Xuetao Cc: Guo Ren Cc: Helge Deller Cc: Ley Foon Tan Cc: Matthew Wilcox Cc: Matt Turner Cc: Michael Ellerman Cc: Michal Hocko Cc: Palmer Dabbelt Cc: Paul Burton Cc: Richard Kuo Cc: Richard Weinberger Cc: Russell King Cc: Sam Creasey Cc: Ralf Baechle Cc: Vincent Chen Cc: Albert Ou Cc: Anton Ivanov Cc: Guo Ren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/unicore32/include/asm/pgalloc.h | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index ec64834b1c6a..3f0903bd98e9 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -14,6 +14,10 @@ #include #include +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include + #define check_pgt_cache() do { } while (0) #define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_PRESENT) @@ -25,17 +29,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); #define pgd_alloc(mm) get_pgd_slow(mm) #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) - /* * Allocate one PTE table. */ static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm) { - pte_t *pte; + pte_t *pte = __pte_alloc_one_kernel(mm); - pte = (pte_t *)__get_free_page(PGALLOC_GFP); if (pte) clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t)); @@ -47,35 +48,14 @@ pte_alloc_one(struct mm_struct *mm) { struct page *pte; - pte = alloc_pages(PGALLOC_GFP, 0); + pte = __pte_alloc_one(mm, GFP_PGTABLE_USER); if (!pte) return NULL; - if (!PageHighMem(pte)) { - void *page = page_address(pte); - clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t)); - } - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - } - + if (!PageHighMem(pte)) + clean_pte_table(page_address(pte)); return pte; } -/* - * Free one PTE table. - */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - if (pte) - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval) { set_pmd(pmdp, __pmd(pmdval)); -- cgit v1.2.3-59-g8ed1b From 8b1e0f81fb6fcf3109465a168b2e2da3f711fa86 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 11 Jul 2019 20:58:43 -0700 Subject: mm/pgtable: drop pgtable_t variable from pte_fn_t functions Drop the pgtable_t variable from all implementation for pte_fn_t as none of them use it. apply_to_pte_range() should stop computing it as well. Should help us save some cycles. Link: http://lkml.kernel.org/r/1556803126-26596-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: Matthew Wilcox Cc: Ard Biesheuvel Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Michal Hocko Cc: Logan Gunthorpe Cc: "Kirill A. Shutemov" Cc: Dan Williams Cc: Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/efi.c | 3 +-- arch/arm/mm/dma-mapping.c | 3 +-- arch/arm/mm/pageattr.c | 3 +-- arch/arm64/kernel/efi.c | 3 +-- arch/arm64/mm/pageattr.c | 3 +-- arch/x86/xen/mmu_pv.c | 3 +-- drivers/gpu/drm/i915/i915_mm.c | 3 +-- drivers/xen/gntdev.c | 6 ++---- drivers/xen/privcmd.c | 6 ++---- drivers/xen/xlate_mmu.c | 3 +-- include/linux/mm.h | 3 +-- mm/memory.c | 5 +---- mm/vmalloc.c | 2 +- 13 files changed, 15 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c index ed005870671a..e57dbcc89123 100644 --- a/arch/arm/kernel/efi.c +++ b/arch/arm/kernel/efi.c @@ -8,8 +8,7 @@ #include #include -static int __init set_permissions(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) +static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) { efi_memory_desc_t *md = data; pte_t pte = *ptep; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 439bb6a59a04..1fb5c0ca1ed8 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -493,8 +493,7 @@ void __init dma_contiguous_remap(void) } } -static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, - void *data) +static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data) { struct page *page = virt_to_page(addr); pgprot_t prot = *(pgprot_t *)data; diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index 0f5faf30d9bf..d546efad7e97 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -14,8 +14,7 @@ struct page_change_data { pgprot_t clear_mask; }; -static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) +static int change_page_range(pte_t *ptep, unsigned long addr, void *data) { struct page_change_data *cdata = data; pte_t pte = *ptep; diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3c33d0dd8e0e..d0cf596db82c 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -82,8 +82,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) return 0; } -static int __init set_permissions(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) +static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) { efi_memory_desc_t *md = data; pte_t pte = READ_ONCE(*ptep); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index fcdcf6cd7677..03c53f16ee77 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -19,8 +19,7 @@ struct page_change_data { bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED); -static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) +static int change_page_range(pte_t *ptep, unsigned long addr, void *data) { struct page_change_data *cdata = data; pte_t pte = READ_ONCE(*ptep); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index beb44e22afdf..f6e5eeecfc69 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2700,8 +2700,7 @@ struct remap_data { struct mmu_update *mmu_update; }; -static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) +static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) { struct remap_data *rmd = data; pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index e4935dd1fd37..c23bb29e6d3e 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -35,8 +35,7 @@ struct remap_pfn { pgprot_t prot; }; -static int remap_pfn(pte_t *pte, pgtable_t token, - unsigned long addr, void *data) +static int remap_pfn(pte_t *pte, unsigned long addr, void *data) { struct remap_pfn *r = data; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 469dfbd6cf90..4c339c7e66e5 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -264,8 +264,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) /* ------------------------------------------------------------------ */ -static int find_grant_ptes(pte_t *pte, pgtable_t token, - unsigned long addr, void *data) +static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data) { struct gntdev_grant_map *map = data; unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT; @@ -292,8 +291,7 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, } #ifdef CONFIG_X86 -static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token, - unsigned long addr, void *data) +static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data) { set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); return 0; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 1ff38d8036e9..2f5ce7230a43 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -731,8 +731,7 @@ struct remap_pfn { unsigned long i; }; -static int remap_pfn_fn(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) +static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data) { struct remap_pfn *r = data; struct page *page = r->pages[r->i]; @@ -966,8 +965,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) * on a per pfn/pte basis. Mapping calls that fail with ENOENT * can be then retried until success. */ -static int is_mapped_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data) { return pte_none(*pte) ? 0 : -EBUSY; } diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index e7df65d32c91..ba883a80b3c0 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -93,8 +93,7 @@ static void setup_hparams(unsigned long gfn, void *data) info->fgfn++; } -static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) +static int remap_pte_fn(pte_t *ptep, unsigned long addr, void *data) { struct remap_data *info = data; struct page *page = info->pages[info->index++]; diff --git a/include/linux/mm.h b/include/linux/mm.h index cb8d413d635e..bb242ad810eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2686,8 +2686,7 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) return 0; } -typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, - void *data); +typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); diff --git a/mm/memory.c b/mm/memory.c index b47e4e56448a..0428ff5ee339 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2036,7 +2036,6 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, { pte_t *pte; int err; - pgtable_t token; spinlock_t *uninitialized_var(ptl); pte = (mm == &init_mm) ? @@ -2049,10 +2048,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, arch_enter_lazy_mmu_mode(); - token = pmd_pgtable(*pmd); - do { - err = fn(pte++, token, addr, data); + err = fn(pte++, addr, data); if (err) break; } while (addr += PAGE_SIZE, addr != end); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 030a544e6602..a5413a6e51fa 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2996,7 +2996,7 @@ void __weak vmalloc_sync_all(void) } -static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) +static int f(pte_t *pte, unsigned long addr, void *data) { pte_t ***p = data; -- cgit v1.2.3-59-g8ed1b From ba5c5e4a5da443e80a3722e67515de5e37375b18 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 11 Jul 2019 20:59:15 -0700 Subject: arm64: move jump_label_init() before parse_early_param() While jump_label_init() was moved earlier in the boot process in efd9e03facd0 ("arm64: Use static keys for CPU features"), it wasn't early enough for early params to use it. The old state of things was as described here... init/main.c calls out to arch-specific things before general jump label and early param handling: asmlinkage __visible void __init start_kernel(void) { ... setup_arch(&command_line); ... smp_prepare_boot_cpu(); ... /* parameters may set static keys */ jump_label_init(); parse_early_param(); ... } x86 setup_arch() wants those earlier, so it handles jump label and early param: void __init setup_arch(char **cmdline_p) { ... jump_label_init(); ... parse_early_param(); ... } arm64 setup_arch() only had early param: void __init setup_arch(char **cmdline_p) { ... parse_early_param(); ... } with jump label later in smp_prepare_boot_cpu(): void __init smp_prepare_boot_cpu(void) { ... jump_label_init(); ... } This moves arm64 jump_label_init() from smp_prepare_boot_cpu() to setup_arch(), as done already on x86, in preparation from early param usage in the init_on_alloc/free() series: https://lkml.kernel.org/r/1561572949.5154.81.camel@lca.pw Link: http://lkml.kernel.org/r/201906271003.005303B52@keescook Signed-off-by: Kees Cook Acked-by: Ard Biesheuvel Acked-by: Catalin Marinas Cc: Alexander Potapenko Cc: Qian Cai Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/setup.c | 5 +++++ arch/arm64/kernel/smp.c | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7e541f947b4c..9c4bad7d7131 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -283,6 +283,11 @@ void __init setup_arch(char **cmdline_p) setup_machine_fdt(__fdt_pointer); + /* + * Initialise the static keys early as they may be enabled by the + * cpufeature code and early parameters. + */ + jump_label_init(); parse_early_param(); /* diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 9286ee6749e8..ea90d3bd9253 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -420,11 +420,6 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - /* - * Initialise the static keys early as they may be enabled by the - * cpufeature code. - */ - jump_label_init(); cpuinfo_store_boot_cpu(); /* -- cgit v1.2.3-59-g8ed1b