From 96fe805fb6fe9b2ed12fc54ad0e3e6829a4152cb Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 20 May 2016 16:59:17 -0700 Subject: mm, kasan: add a ksize() test Add a test that makes sure ksize() unpoisons the whole chunk. Signed-off-by: Alexander Potapenko Acked-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Christoph Lameter Cc: Konstantin Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 82169fbf2453..48e5a0be655c 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -344,6 +344,25 @@ static noinline void __init kasan_stack_oob(void) *(volatile char *)p; } +static noinline void __init ksize_unpoisons_memory(void) +{ + char *ptr; + size_t size = 123, real_size = size; + + pr_info("ksize() unpoisons the whole allocated chunk\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + real_size = ksize(ptr); + /* This access doesn't trigger an error. */ + ptr[size] = 'x'; + /* This one does. */ + ptr[real_size] = 'y'; + kfree(ptr); +} + static int __init kmalloc_tests_init(void) { kmalloc_oob_right(); @@ -367,6 +386,7 @@ static int __init kmalloc_tests_init(void) kmem_cache_oob(); kasan_stack_oob(); kasan_global_oob(); + ksize_unpoisons_memory(); return -EAGAIN; } -- cgit v1.2.3-59-g8ed1b From 1771c6e1a567ea0ba2cccc0a4ffe68a1419fd8ef Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 May 2016 16:59:31 -0700 Subject: x86/kasan: instrument user memory access API Exchange between user and kernel memory is coded in assembly language. Which means that such accesses won't be spotted by KASAN as a compiler instruments only C code. Add explicit KASAN checks to user memory access API to ensure that userspace writes to (or reads from) a valid kernel memory. Note: Unlike others strncpy_from_user() is written mostly in C and KASAN sees memory accesses in it. However, it makes sense to add explicit check for all @count bytes that *potentially* could be written to the kernel. [aryabinin@virtuozzo.com: move kasan check under the condition] Link: http://lkml.kernel.org/r/1462869209-21096-1-git-send-email-aryabinin@virtuozzo.com Link: http://lkml.kernel.org/r/1462538722-1574-4-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 5 +++++ arch/x86/include/asm/uaccess_64.h | 7 +++++++ lib/strncpy_from_user.c | 2 ++ 3 files changed, 14 insertions(+) (limited to 'lib') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 12f9653bde8d..2982387ba817 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -5,6 +5,7 @@ */ #include #include +#include #include #include #include @@ -721,6 +722,8 @@ copy_from_user(void *to, const void __user *from, unsigned long n) might_fault(); + kasan_check_write(to, n); + /* * While we would like to have the compiler do the checking for us * even in the non-constant size case, any false positives there are @@ -754,6 +757,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n) { int sz = __compiletime_object_size(from); + kasan_check_read(from, n); + might_fault(); /* See the comment in copy_from_user() above. */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 307698688fa1..2eac2aa3e37f 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -109,6 +110,7 @@ static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { might_fault(); + kasan_check_write(dst, size); return __copy_from_user_nocheck(dst, src, size); } @@ -175,6 +177,7 @@ static __always_inline __must_check int __copy_to_user(void __user *dst, const void *src, unsigned size) { might_fault(); + kasan_check_read(src, size); return __copy_to_user_nocheck(dst, src, size); } @@ -242,12 +245,14 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) static __must_check __always_inline int __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) { + kasan_check_write(dst, size); return __copy_from_user_nocheck(dst, src, size); } static __must_check __always_inline int __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) { + kasan_check_read(src, size); return __copy_to_user_nocheck(dst, src, size); } @@ -258,6 +263,7 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) { might_fault(); + kasan_check_write(dst, size); return __copy_user_nocache(dst, src, size, 1); } @@ -265,6 +271,7 @@ static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) { + kasan_check_write(dst, size); return __copy_user_nocache(dst, src, size, 0); } diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 33840324138c..33f655ef48cd 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -109,6 +110,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count) unsigned long max = max_addr - src_addr; long retval; + kasan_check_write(dst, count); user_access_begin(); retval = do_strncpy_from_user(dst, src, count, max); user_access_end(); -- cgit v1.2.3-59-g8ed1b From eae08dcab80c695c16c9f1f7dcd5b8ed52bfc88b Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 20 May 2016 16:59:34 -0700 Subject: kasan/tests: add tests for user memory access functions Add some tests for the newly-added user memory access API. Link: http://lkml.kernel.org/r/1462538722-1574-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 48e5a0be655c..5e51872b3fc1 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -12,9 +12,12 @@ #define pr_fmt(fmt) "kasan test: %s " fmt, __func__ #include +#include +#include #include #include #include +#include #include static noinline void __init kmalloc_oob_right(void) @@ -363,6 +366,51 @@ static noinline void __init ksize_unpoisons_memory(void) kfree(ptr); } +static noinline void __init copy_user_test(void) +{ + char *kmem; + char __user *usermem; + size_t size = 10; + int unused; + + kmem = kmalloc(size, GFP_KERNEL); + if (!kmem) + return; + + usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (IS_ERR(usermem)) { + pr_err("Failed to allocate user memory\n"); + kfree(kmem); + return; + } + + pr_info("out-of-bounds in copy_from_user()\n"); + unused = copy_from_user(kmem, usermem, size + 1); + + pr_info("out-of-bounds in copy_to_user()\n"); + unused = copy_to_user(usermem, kmem, size + 1); + + pr_info("out-of-bounds in __copy_from_user()\n"); + unused = __copy_from_user(kmem, usermem, size + 1); + + pr_info("out-of-bounds in __copy_to_user()\n"); + unused = __copy_to_user(usermem, kmem, size + 1); + + pr_info("out-of-bounds in __copy_from_user_inatomic()\n"); + unused = __copy_from_user_inatomic(kmem, usermem, size + 1); + + pr_info("out-of-bounds in __copy_to_user_inatomic()\n"); + unused = __copy_to_user_inatomic(usermem, kmem, size + 1); + + pr_info("out-of-bounds in strncpy_from_user()\n"); + unused = strncpy_from_user(kmem, usermem, size + 1); + + vm_munmap((unsigned long)usermem, PAGE_SIZE); + kfree(kmem); +} + static int __init kmalloc_tests_init(void) { kmalloc_oob_right(); @@ -387,6 +435,7 @@ static int __init kmalloc_tests_init(void) kasan_stack_oob(); kasan_global_oob(); ksize_unpoisons_memory(); + copy_user_test(); return -EAGAIN; } -- cgit v1.2.3-59-g8ed1b From 42a0bb3f71383b457a7db362f1c69e7afb96732b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 20 May 2016 17:00:33 -0700 Subject: printk/nmi: generic solution for safe printk in NMI printk() takes some locks and could not be used a safe way in NMI context. The chance of a deadlock is real especially when printing stacks from all CPUs. This particular problem has been addressed on x86 by the commit a9edc8809328 ("x86/nmi: Perform a safe NMI stack trace on all CPUs"). The patchset brings two big advantages. First, it makes the NMI backtraces safe on all architectures for free. Second, it makes all NMI messages almost safe on all architectures (the temporary buffer is limited. We still should keep the number of messages in NMI context at minimum). Note that there already are several messages printed in NMI context: WARN_ON(in_nmi()), BUG_ON(in_nmi()), anything being printed out from MCE handlers. These are not easy to avoid. This patch reuses most of the code and makes it generic. It is useful for all messages and architectures that support NMI. The alternative printk_func is set when entering and is reseted when leaving NMI context. It queues IRQ work to copy the messages into the main ring buffer in a safe context. __printk_nmi_flush() copies all available messages and reset the buffer. Then we could use a simple cmpxchg operations to get synchronized with writers. There is also used a spinlock to get synchronized with other flushers. We do not longer use seq_buf because it depends on external lock. It would be hard to make all supported operations safe for a lockless use. It would be confusing and error prone to make only some operations safe. The code is put into separate printk/nmi.c as suggested by Steven Rostedt. It needs a per-CPU buffer and is compiled only on architectures that call nmi_enter(). This is achieved by the new HAVE_NMI Kconfig flag. The are MN10300 and Xtensa architectures. We need to clean up NMI handling there first. Let's do it separately. The patch is heavily based on the draft from Peter Zijlstra, see https://lkml.org/lkml/2015/6/10/327 [arnd@arndb.de: printk-nmi: use %zu format string for size_t] [akpm@linux-foundation.org: min_t->min - all types are size_t here] Signed-off-by: Petr Mladek Suggested-by: Peter Zijlstra Suggested-by: Steven Rostedt Cc: Jan Kara Acked-by: Russell King [arm part] Cc: Daniel Thompson Cc: Jiri Kosina Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: David Miller Cc: Daniel Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 + arch/arm/Kconfig | 1 + arch/arm/kernel/smp.c | 2 + arch/avr32/Kconfig | 1 + arch/blackfin/Kconfig | 1 + arch/cris/Kconfig | 1 + arch/mips/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/tile/Kconfig | 1 + arch/x86/Kconfig | 1 + arch/x86/kernel/apic/hw_nmi.c | 1 - include/linux/hardirq.h | 2 + include/linux/percpu.h | 3 - include/linux/printk.h | 12 ++- init/Kconfig | 5 + init/main.c | 1 + kernel/printk/Makefile | 1 + kernel/printk/internal.h | 44 +++++++++ kernel/printk/nmi.c | 219 ++++++++++++++++++++++++++++++++++++++++++ kernel/printk/printk.c | 19 +--- lib/nmi_backtrace.c | 89 +---------------- 24 files changed, 306 insertions(+), 107 deletions(-) create mode 100644 kernel/printk/internal.h create mode 100644 kernel/printk/nmi.c (limited to 'lib') diff --git a/arch/Kconfig b/arch/Kconfig index 0f298f9123dc..8f84fd268dee 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -187,7 +187,11 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool +config HAVE_NMI + bool + config HAVE_NMI_WATCHDOG + depends on HAVE_NMI bool # # An arch should select this if it provides all these things: diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 956d3575426c..90542db1220d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -67,6 +67,7 @@ config ARM select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_OPTPROBES if !THUMB2_KERNEL select HAVE_PERF_EVENTS diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index baee70267f29..df90bc59bfce 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -644,9 +644,11 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CPU_BACKTRACE: + printk_nmi_enter(); irq_enter(); nmi_cpu_backtrace(regs); irq_exit(); + printk_nmi_exit(); break; default: diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index e43519a2ca89..7e75d45e20cd 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -18,6 +18,7 @@ config AVR32 select GENERIC_CLOCKEVENTS select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA + select HAVE_NMI help AVR32 is a high-performance 32-bit RISC microprocessor core, designed for cost-sensitive embedded applications, with particular diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index a63c12259e77..28c63fea786d 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -40,6 +40,7 @@ config BLACKFIN select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW + select HAVE_NMI config GENERIC_CSUM def_bool y diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 5c0ca8ae9293..deba2662b9f3 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -70,6 +70,7 @@ config CRIS select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32 select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32 select HAVE_DEBUG_BUGVERBOSE if ETRAX_ARCH_V32 + select HAVE_NMI config HZ int diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 5663f411c225..8040fb1845b4 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -48,6 +48,7 @@ config MIPS select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC select GENERIC_CMOS_UPDATE select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI select VIRT_TO_BUS select MODULES_USE_ELF_REL if MODULES select MODULES_USE_ELF_RELA if MODULES && 64BIT diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f0403b58ae8b..01f7464d9fea 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -155,6 +155,7 @@ config PPC select NO_BOOTMEM select HAVE_GENERIC_RCU_GUP select HAVE_PERF_EVENTS_NMI if PPC64 + select HAVE_NMI if PERF_EVENTS select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB select ARCH_HAS_DMA_SET_COHERENT_MASK diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e2c9aaaf64b2..1c3c43d9d1b5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -166,6 +166,7 @@ config S390 select TTY select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS + select HAVE_NMI config SCHED_OMIT_FRAME_POINTER diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index cb93af8f8017..f6254341c065 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -44,6 +44,7 @@ config SUPERH select OLD_SIGSUSPEND select OLD_SIGACTION select HAVE_ARCH_AUDITSYSCALL + select HAVE_NMI help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 27b3a0ad40a0..1012f7ffcdf5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -79,6 +79,7 @@ config SPARC64 select NO_BOOTMEM select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select HAVE_NMI config ARCH_DEFCONFIG string diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 174746225577..76989b878f3c 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -30,6 +30,7 @@ config TILE select HAVE_DEBUG_STACKOVERFLOW select ARCH_WANT_FRAME_POINTERS select HAVE_CONTEXT_TRACKING + select HAVE_NMI if USE_PMC select EDAC_SUPPORT select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ff5b3be95d4..0a7b885964ba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -131,6 +131,7 @@ config X86 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MIXED_BREAKPOINTS_REGS + select HAVE_NMI select HAVE_OPROFILE select HAVE_OPTPROBES select HAVE_PCSPKR_PLATFORM diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 045e424fb368..7788ce643bf4 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -18,7 +18,6 @@ #include #include #include -#include #ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index dfd59d6bc6f0..c683996110b1 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -61,6 +61,7 @@ extern void irq_exit(void); #define nmi_enter() \ do { \ + printk_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ @@ -77,6 +78,7 @@ extern void irq_exit(void); preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ lockdep_on(); \ + printk_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4bc6dafb703e..56939d3f6e53 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -129,7 +129,4 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ __alignof__(type)) -/* To avoid include hell, as printk can not declare this, we declare it here */ -DECLARE_PER_CPU(printk_func_t, printk_func); - #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 9ccbdf2c1453..51dd6b824fe2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -122,7 +122,17 @@ static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); +#ifdef CONFIG_PRINTK_NMI +extern void printk_nmi_init(void); +extern void printk_nmi_enter(void); +extern void printk_nmi_exit(void); +extern void printk_nmi_flush(void); +#else +static inline void printk_nmi_init(void) { } +static inline void printk_nmi_enter(void) { } +static inline void printk_nmi_exit(void) { } +static inline void printk_nmi_flush(void) { } +#endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) diff --git a/init/Kconfig b/init/Kconfig index 79a91a2c0444..bccc1d607be5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1454,6 +1454,11 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. +config PRINTK_NMI + def_bool y + depends on PRINTK + depends on HAVE_NMI + config BUG bool "BUG() support" if EXPERT default y diff --git a/init/main.c b/init/main.c index 2075fafaad59..fa9b2bdde183 100644 --- a/init/main.c +++ b/init/main.c @@ -569,6 +569,7 @@ asmlinkage __visible void __init start_kernel(void) timekeeping_init(); time_init(); sched_clock_postinit(); + printk_nmi_init(); perf_event_init(); profile_init(); call_function_init(); diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 85405bdcf2b3..abb0042a427b 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,2 +1,3 @@ obj-y = printk.o +obj-$(CONFIG_PRINTK_NMI) += nmi.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h new file mode 100644 index 000000000000..2de99faedfc1 --- /dev/null +++ b/kernel/printk/internal.h @@ -0,0 +1,44 @@ +/* + * internal.h - printk internal definitions + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ +#include + +typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); + +int __printf(1, 0) vprintk_default(const char *fmt, va_list args); + +#ifdef CONFIG_PRINTK_NMI + +/* + * printk() could not take logbuf_lock in NMI context. Instead, + * it temporary stores the strings into a per-CPU buffer. + * The alternative implementation is chosen transparently + * via per-CPU variable. + */ +DECLARE_PER_CPU(printk_func_t, printk_func); +static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + return this_cpu_read(printk_func)(fmt, args); +} + +#else /* CONFIG_PRINTK_NMI */ + +static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + return vprintk_default(fmt, args); +} + +#endif /* CONFIG_PRINTK_NMI */ diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c new file mode 100644 index 000000000000..303cf0d15e57 --- /dev/null +++ b/kernel/printk/nmi.c @@ -0,0 +1,219 @@ +/* + * nmi.c - Safe printk in NMI context + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include + +#include "internal.h" + +/* + * printk() could not take logbuf_lock in NMI context. Instead, + * it uses an alternative implementation that temporary stores + * the strings into a per-CPU buffer. The content of the buffer + * is later flushed into the main ring buffer via IRQ work. + * + * The alternative implementation is chosen transparently + * via @printk_func per-CPU variable. + * + * The implementation allows to flush the strings also from another CPU. + * There are situations when we want to make sure that all buffers + * were handled or when IRQs are blocked. + */ +DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; +static int printk_nmi_irq_ready; + +#define NMI_LOG_BUF_LEN (4096 - sizeof(atomic_t) - sizeof(struct irq_work)) + +struct nmi_seq_buf { + atomic_t len; /* length of written data */ + struct irq_work work; /* IRQ work that flushes the buffer */ + unsigned char buffer[NMI_LOG_BUF_LEN]; +}; +static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); + +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_nmi(const char *fmt, va_list args) +{ + struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + int add = 0; + size_t len; + +again: + len = atomic_read(&s->len); + + if (len >= sizeof(s->buffer)) + return 0; + + /* + * Make sure that all old data have been read before the buffer was + * reseted. This is not needed when we just append data. + */ + if (!len) + smp_rmb(); + + add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args); + + /* + * Do it once again if the buffer has been flushed in the meantime. + * Note that atomic_cmpxchg() is an implicit memory barrier that + * makes sure that the data were written before updating s->len. + */ + if (atomic_cmpxchg(&s->len, len, len + add) != len) + goto again; + + /* Get flushed in a more safe context. */ + if (add && printk_nmi_irq_ready) { + /* Make sure that IRQ work is really initialized. */ + smp_rmb(); + irq_work_queue(&s->work); + } + + return add; +} + +/* + * printk one line from the temporary buffer from @start index until + * and including the @end index. + */ +static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end) +{ + const char *buf = s->buffer + start; + + printk("%.*s", (end - start) + 1, buf); +} + +/* + * Flush data from the associated per_CPU buffer. The function + * can be called either via IRQ work or independently. + */ +static void __printk_nmi_flush(struct irq_work *work) +{ + static raw_spinlock_t read_lock = + __RAW_SPIN_LOCK_INITIALIZER(read_lock); + struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); + unsigned long flags; + size_t len, size; + int i, last_i; + + /* + * The lock has two functions. First, one reader has to flush all + * available message to make the lockless synchronization with + * writers easier. Second, we do not want to mix messages from + * different CPUs. This is especially important when printing + * a backtrace. + */ + raw_spin_lock_irqsave(&read_lock, flags); + + i = 0; +more: + len = atomic_read(&s->len); + + /* + * This is just a paranoid check that nobody has manipulated + * the buffer an unexpected way. If we printed something then + * @len must only increase. + */ + if (i && i >= len) + pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n", + i, len); + + if (!len) + goto out; /* Someone else has already flushed the buffer. */ + + /* Make sure that data has been written up to the @len */ + smp_rmb(); + + size = min(len, sizeof(s->buffer)); + last_i = i; + + /* Print line by line. */ + for (; i < size; i++) { + if (s->buffer[i] == '\n') { + print_nmi_seq_line(s, last_i, i); + last_i = i + 1; + } + } + /* Check if there was a partial line. */ + if (last_i < size) { + print_nmi_seq_line(s, last_i, size - 1); + pr_cont("\n"); + } + + /* + * Check that nothing has got added in the meantime and truncate + * the buffer. Note that atomic_cmpxchg() is an implicit memory + * barrier that makes sure that the data were copied before + * updating s->len. + */ + if (atomic_cmpxchg(&s->len, len, 0) != len) + goto more; + +out: + raw_spin_unlock_irqrestore(&read_lock, flags); +} + +/** + * printk_nmi_flush - flush all per-cpu nmi buffers. + * + * The buffers are flushed automatically via IRQ work. This function + * is useful only when someone wants to be sure that all buffers have + * been flushed at some point. + */ +void printk_nmi_flush(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + __printk_nmi_flush(&per_cpu(nmi_print_seq, cpu).work); +} + +void __init printk_nmi_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + + init_irq_work(&s->work, __printk_nmi_flush); + } + + /* Make sure that IRQ works are initialized before enabling. */ + smp_wmb(); + printk_nmi_irq_ready = 1; + + /* Flush pending messages that did not have scheduled IRQ works. */ + printk_nmi_flush(); +} + +void printk_nmi_enter(void) +{ + this_cpu_write(printk_func, vprintk_nmi); +} + +void printk_nmi_exit(void) +{ + this_cpu_write(printk_func, vprintk_default); +} diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index bfbf284e4218..71eba0607034 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -55,6 +55,7 @@ #include "console_cmdline.h" #include "braille.h" +#include "internal.h" int console_printk[4] = { CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ @@ -1807,14 +1808,6 @@ int vprintk_default(const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(vprintk_default); -/* - * This allows printk to be diverted to another function per cpu. - * This is useful for calling printk functions from within NMI - * without worrying about race conditions that can lock up the - * box. - */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; - /** * printk - print a kernel message * @fmt: format string @@ -1838,21 +1831,11 @@ DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; */ asmlinkage __visible int printk(const char *fmt, ...) { - printk_func_t vprintk_func; va_list args; int r; va_start(args, fmt); - - /* - * If a caller overrides the per_cpu printk_func, then it needs - * to disable preemption when calling printk(). Otherwise - * the printk_func should be set to the default. No need to - * disable preemption here. - */ - vprintk_func = this_cpu_read(printk_func); r = vprintk_func(fmt, args); - va_end(args); return r; diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 6019c53c669e..26caf51cc238 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -16,33 +16,14 @@ #include #include #include -#include #ifdef arch_trigger_all_cpu_backtrace /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -static cpumask_t printtrace_mask; - -#define NMI_BUF_SIZE 4096 - -struct nmi_seq_buf { - unsigned char buffer[NMI_BUF_SIZE]; - struct seq_buf seq; -}; - -/* Safe printing in NMI context */ -static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); /* "in progress" flag of arch_trigger_all_cpu_backtrace */ static unsigned long backtrace_flag; -static void print_seq_line(struct nmi_seq_buf *s, int start, int end) -{ - const char *buf = s->buffer + start; - - printk("%.*s", (end - start) + 1, buf); -} - /* * When raise() is called it will be is passed a pointer to the * backtrace_mask. Architectures that call nmi_cpu_backtrace() @@ -52,8 +33,7 @@ static void print_seq_line(struct nmi_seq_buf *s, int start, int end) void nmi_trigger_all_cpu_backtrace(bool include_self, void (*raise)(cpumask_t *mask)) { - struct nmi_seq_buf *s; - int i, cpu, this_cpu = get_cpu(); + int i, this_cpu = get_cpu(); if (test_and_set_bit(0, &backtrace_flag)) { /* @@ -68,17 +48,6 @@ void nmi_trigger_all_cpu_backtrace(bool include_self, if (!include_self) cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); - cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask)); - - /* - * Set up per_cpu seq_buf buffers that the NMIs running on the other - * CPUs will write to. - */ - for_each_cpu(cpu, to_cpumask(backtrace_mask)) { - s = &per_cpu(nmi_print_seq, cpu); - seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); - } - if (!cpumask_empty(to_cpumask(backtrace_mask))) { pr_info("Sending NMI to %s CPUs:\n", (include_self ? "all" : "other")); @@ -94,73 +63,25 @@ void nmi_trigger_all_cpu_backtrace(bool include_self, } /* - * Now that all the NMIs have triggered, we can dump out their - * back traces safely to the console. + * Force flush any remote buffers that might be stuck in IRQ context + * and therefore could not run their irq_work. */ - for_each_cpu(cpu, &printtrace_mask) { - int len, last_i = 0; + printk_nmi_flush(); - s = &per_cpu(nmi_print_seq, cpu); - len = seq_buf_used(&s->seq); - if (!len) - continue; - - /* Print line by line. */ - for (i = 0; i < len; i++) { - if (s->buffer[i] == '\n') { - print_seq_line(s, last_i, i); - last_i = i + 1; - } - } - /* Check if there was a partial line. */ - if (last_i < len) { - print_seq_line(s, last_i, len - 1); - pr_cont("\n"); - } - } - - clear_bit(0, &backtrace_flag); - smp_mb__after_atomic(); + clear_bit_unlock(0, &backtrace_flag); put_cpu(); } -/* - * It is not safe to call printk() directly from NMI handlers. - * It may be fine if the NMI detected a lock up and we have no choice - * but to do so, but doing a NMI on all other CPUs to get a back trace - * can be done with a sysrq-l. We don't want that to lock up, which - * can happen if the NMI interrupts a printk in progress. - * - * Instead, we redirect the vprintk() to this nmi_vprintk() that writes - * the content into a per cpu seq_buf buffer. Then when the NMIs are - * all done, we can safely dump the contents of the seq_buf to a printk() - * from a non NMI context. - */ -static int nmi_vprintk(const char *fmt, va_list args) -{ - struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - unsigned int len = seq_buf_used(&s->seq); - - seq_buf_vprintf(&s->seq, fmt, args); - return seq_buf_used(&s->seq) - len; -} - bool nmi_cpu_backtrace(struct pt_regs *regs) { int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - printk_func_t printk_func_save = this_cpu_read(printk_func); - - /* Replace printk to write into the NMI seq */ - this_cpu_write(printk_func, nmi_vprintk); pr_warn("NMI backtrace for cpu %d\n", cpu); if (regs) show_regs(regs); else dump_stack(); - this_cpu_write(printk_func, printk_func_save); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; } -- cgit v1.2.3-59-g8ed1b From aa4ea1c3b3948d325a6826adf9c367d11fa1ab74 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:00:54 -0700 Subject: lib/vsprintf: simplify UUID printing There are few functions here and there along with type definitions that provide UUID API. This series consolidates everything under one hood and converts current users. This has been tested for a while internally, however it doesn't mean we covered all possible cases (especially accuracy of UUID constants after conversion). So, please test this as much as you can and provide your tag. We appreciate the effort. The ACPI conversion is postponed for now to sort more generic things out first. This patch (of 9): Since we have hex_byte_pack_upper() we may use it directly and avoid second loop. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index ccb664b54280..be0e7cf11e48 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1324,7 +1324,10 @@ char *uuid_string(char *buf, char *end, const u8 *addr, } for (i = 0; i < 16; i++) { - p = hex_byte_pack(p, addr[index[i]]); + if (uc) + p = hex_byte_pack_upper(p, addr[index[i]]); + else + p = hex_byte_pack(p, addr[index[i]]); switch (i) { case 3: case 5: @@ -1337,13 +1340,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr, *p = 0; - if (uc) { - p = uuid; - do { - *p = toupper(*p); - } while (*(++p)); - } - return string(buf, end, uuid, spec); } -- cgit v1.2.3-59-g8ed1b From 8da4b8c48e7b43cb16d05e1dbb34ad9f73ab7efd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:00 -0700 Subject: lib/uuid.c: move generate_random_uuid() to uuid.c Let's gather the UUID related functions under one hood. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/random.c | 21 +-------------------- fs/btrfs/volumes.c | 2 +- fs/ext4/ioctl.c | 2 +- fs/f2fs/file.c | 2 +- fs/reiserfs/objectid.c | 2 +- fs/ubifs/sb.c | 2 +- include/linux/random.h | 1 - include/linux/uuid.h | 2 ++ lib/uuid.c | 20 ++++++++++++++++++++ 9 files changed, 28 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/drivers/char/random.c b/drivers/char/random.c index b583e5336630..0158d3bff7e5 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -260,6 +260,7 @@ #include #include #include +#include #include #include @@ -1621,26 +1622,6 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, return urandom_read(NULL, buf, count, NULL); } -/*************************************************************** - * Random UUID interface - * - * Used here for a Boot ID, but can be useful for other kernel - * drivers. - ***************************************************************/ - -/* - * Generate random UUID - */ -void generate_random_uuid(unsigned char uuid_out[16]) -{ - get_random_bytes(uuid_out, 16); - /* Set UUID version to 4 --- truly random generation */ - uuid_out[6] = (uuid_out[6] & 0x0F) | 0x40; - /* Set the UUID variant to DCE */ - uuid_out[8] = (uuid_out[8] & 0x3F) | 0x80; -} -EXPORT_SYMBOL(generate_random_uuid); - /******************************************************************** * * Sysctl interface diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bd0f45fb38c4..bfb80da3e6eb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -20,13 +20,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include #include "ctree.h" #include "extent_map.h" diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index eae5917c534e..7497f50cb293 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -13,8 +13,8 @@ #include #include #include -#include #include +#include #include #include "ext4_jbd2.h" #include "ext4.h" diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index eb9d027e5981..c6b14951bef3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include "f2fs.h" #include "node.h" diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c index 99a5d5dae46a..415d66ca87d1 100644 --- a/fs/reiserfs/objectid.c +++ b/fs/reiserfs/objectid.c @@ -3,8 +3,8 @@ */ #include -#include #include +#include #include "reiserfs.h" /* find where objectid map starts */ diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index f4fbc7b6b794..3cbb904a6d7d 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -28,8 +28,8 @@ #include "ubifs.h" #include -#include #include +#include /* * Default journal size in logical eraseblocks as a percent of total diff --git a/include/linux/random.h b/include/linux/random.h index 9c29122037f9..e47e533742b5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -26,7 +26,6 @@ extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -void generate_random_uuid(unsigned char uuid_out[16]); extern int random_int_secret_init(void); #ifndef MODULE diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 6df2509033d7..91c2b6d9cbb7 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -33,6 +33,8 @@ static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2) return memcmp(&u1, &u2, sizeof(uuid_be)); } +void generate_random_uuid(unsigned char uuid[16]); + extern void uuid_le_gen(uuid_le *u); extern void uuid_be_gen(uuid_be *u); diff --git a/lib/uuid.c b/lib/uuid.c index 398821e4dce1..6c81c0b0467e 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -23,6 +23,26 @@ #include #include +/*************************************************************** + * Random UUID interface + * + * Used here for a Boot ID, but can be useful for other kernel + * drivers. + ***************************************************************/ + +/* + * Generate random UUID + */ +void generate_random_uuid(unsigned char uuid[16]) +{ + get_random_bytes(uuid, 16); + /* Set UUID version to 4 --- truly random generation */ + uuid[6] = (uuid[6] & 0x0F) | 0x40; + /* Set the UUID variant to DCE */ + uuid[8] = (uuid[8] & 0x3F) | 0x80; +} +EXPORT_SYMBOL(generate_random_uuid); + static void __uuid_gen_common(__u8 b[16]) { prandom_bytes(b, 16); -- cgit v1.2.3-59-g8ed1b From 2b1b0d66704a8cafe83be7114ec4c15ab3a314ad Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:04 -0700 Subject: lib/uuid.c: introduce a few more generic helpers There are new helpers in this patch: uuid_is_valid checks if a UUID is valid uuid_be_to_bin converts from string to binary (big endian) uuid_le_to_bin converts from string to binary (little endian) They will be used in future, i.e. in the following patches in the series. This also moves the indices arrays to lib/uuid.c to be shared accross modules. [andriy.shevchenko@linux.intel.com: fix typo] Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uuid.h | 13 +++++++++++ lib/uuid.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/vsprintf.c | 9 ++++---- 3 files changed, 82 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 91c2b6d9cbb7..e0b95e728a77 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -22,6 +22,11 @@ #include +/* + * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") + * not including trailing NUL. + */ +#define UUID_STRING_LEN 36 static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) { @@ -38,4 +43,12 @@ void generate_random_uuid(unsigned char uuid[16]); extern void uuid_le_gen(uuid_le *u); extern void uuid_be_gen(uuid_be *u); +bool __must_check uuid_is_valid(const char *uuid); + +extern const u8 uuid_le_index[16]; +extern const u8 uuid_be_index[16]; + +int uuid_le_to_bin(const char *uuid, uuid_le *u); +int uuid_be_to_bin(const char *uuid, uuid_be *u); + #endif diff --git a/lib/uuid.c b/lib/uuid.c index 6c81c0b0467e..82787f652fbc 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -19,10 +19,17 @@ */ #include +#include +#include #include #include #include +const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; +EXPORT_SYMBOL(uuid_le_index); +const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; +EXPORT_SYMBOL(uuid_be_index); + /*************************************************************** * Random UUID interface * @@ -65,3 +72,61 @@ void uuid_be_gen(uuid_be *bu) bu->b[6] = (bu->b[6] & 0x0F) | 0x40; } EXPORT_SYMBOL_GPL(uuid_be_gen); + +/** + * uuid_is_valid - checks if UUID string valid + * @uuid: UUID string to check + * + * Description: + * It checks if the UUID string is following the format: + * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + * where x is a hex digit. + * + * Return: true if input is valid UUID string. + */ +bool uuid_is_valid(const char *uuid) +{ + unsigned int i; + + for (i = 0; i < UUID_STRING_LEN; i++) { + if (i == 8 || i == 13 || i == 18 || i == 23) { + if (uuid[i] != '-') + return false; + } else if (!isxdigit(uuid[i])) { + return false; + } + } + + return true; +} +EXPORT_SYMBOL(uuid_is_valid); + +static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16]) +{ + static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34}; + unsigned int i; + + if (!uuid_is_valid(uuid)) + return -EINVAL; + + for (i = 0; i < 16; i++) { + int hi = hex_to_bin(uuid[si[i]] + 0); + int lo = hex_to_bin(uuid[si[i]] + 1); + + b[ei[i]] = (hi << 4) | lo; + } + + return 0; +} + +int uuid_le_to_bin(const char *uuid, uuid_le *u) +{ + return __uuid_to_bin(uuid, u->b, uuid_le_index); +} +EXPORT_SYMBOL(uuid_le_to_bin); + +int uuid_be_to_bin(const char *uuid, uuid_be *u) +{ + return __uuid_to_bin(uuid, u->b, uuid_be_index); +} +EXPORT_SYMBOL(uuid_be_to_bin); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index be0e7cf11e48..0967771d8f7f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #ifdef CONFIG_BLOCK #include @@ -1304,19 +1305,17 @@ static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, struct printf_spec spec, const char *fmt) { - char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; + char uuid[UUID_STRING_LEN + 1]; char *p = uuid; int i; - static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; - const u8 *index = be; + const u8 *index = uuid_be_index; bool uc = false; switch (*(++fmt)) { case 'L': uc = true; /* fall-through */ case 'l': - index = le; + index = uuid_le_index; break; case 'B': uc = true; -- cgit v1.2.3-59-g8ed1b From e3a93bce69ad3e2c38927abe311b8cb4f17abbaf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 May 2016 17:01:07 -0700 Subject: lib/uuid.c: remove FSF address There is no point in keeping an address in the file since it's subject to change. While here, update Intel Copyright years. Signed-off-by: Andy Shevchenko Reviewed-by: Matt Fleming Cc: Dmitry Kasatkin Cc: Mimi Zohar Cc: Rasmus Villemoes Cc: Arnd Bergmann Cc: "Theodore Ts'o" Cc: Al Viro Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uuid.h | 6 +----- include/uapi/linux/uuid.h | 4 ---- lib/uuid.c | 6 +----- 3 files changed, 2 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index e0b95e728a77..2d095fc60204 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -1,7 +1,7 @@ /* * UUID/GUID definition * - * Copyright (C) 2010, Intel Corp. + * Copyright (C) 2010, 2016 Intel Corp. * Huang Ying * * This program is free software; you can redistribute it and/or @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _LINUX_UUID_H_ #define _LINUX_UUID_H_ diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h index 786f0773cc33..3738e5fb6a4d 100644 --- a/include/uapi/linux/uuid.h +++ b/include/uapi/linux/uuid.h @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _UAPI_LINUX_UUID_H_ diff --git a/lib/uuid.c b/lib/uuid.c index 82787f652fbc..e116ae5fa00f 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -1,7 +1,7 @@ /* * Unified UUID/GUID definition * - * Copyright (C) 2009, Intel Corp. + * Copyright (C) 2009, 2016 Intel Corp. * Huang Ying * * This program is free software; you can redistribute it and/or @@ -12,10 +12,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include -- cgit v1.2.3-59-g8ed1b From 57578c2ea2cb2e0d362a9212ac83cf90221d4883 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:54 -0700 Subject: raxix-tree: introduce CONFIG_RADIX_TREE_MULTIORDER I've been receiving increasingly concerned notes from 0day about how much my recent changes have been bloating the radix tree. Make it happier by only including multiorder support if CONFIG_TRANSPARENT_HUGEPAGES is set. This is an independent Kconfig option, so other radix tree users can also set it if they have a need. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 3 +++ lib/radix-tree.c | 26 ++++++++++++++++++-------- mm/Kconfig | 1 + tools/testing/radix-tree/linux/kernel.h | 1 + 4 files changed, 23 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 61d55bd0ed89..d79909dc01ec 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -362,6 +362,9 @@ config INTERVAL_TREE for more information. +config RADIX_TREE_MULTIORDER + bool + config ASSOCIATIVE_ARRAY bool help diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1624c4117961..799f341977d0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -484,6 +484,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = node->slots[offset]; } +#ifdef CONFIG_RADIX_TREE_MULTIORDER /* Insert pointers to the canonical entry */ if ((shift - order) > 0) { int i, n = 1 << (shift - order); @@ -499,6 +500,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, node->count++; } } +#endif if (nodep) *nodep = node; @@ -1469,6 +1471,20 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, return deleted; } +static inline void delete_sibling_entries(struct radix_tree_node *node, + void *ptr, unsigned offset) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + int i; + for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { + if (node->slots[offset + i] != ptr) + break; + node->slots[offset + i] = NULL; + node->count--; + } +#endif +} + /** * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root @@ -1484,7 +1500,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node; - unsigned int offset, i; + unsigned int offset; void **slot; void *entry; int tag; @@ -1513,13 +1529,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, radix_tree_tag_clear(root, index, tag); } - /* Delete any sibling slots pointing to this slot */ - for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { - if (node->slots[offset + i] != ptr_to_indirect(slot)) - break; - node->slots[offset + i] = NULL; - node->count--; - } + delete_sibling_entries(node, ptr_to_indirect(slot), offset); node->slots[offset] = NULL; node->count--; diff --git a/mm/Kconfig b/mm/Kconfig index 1a6a28ebcb8b..2664c118b5d2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -404,6 +404,7 @@ config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE select COMPACTION + select RADIX_TREE_MULTIORDER help Transparent Hugepages allows the kernel to use huge pages and huge tlb transparently to the applications whenever possible. diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 31fe2c77d7ae..8ea0ed450810 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -9,6 +9,7 @@ #include "../../include/linux/compiler.h" +#define CONFIG_RADIX_TREE_MULTIORDER #define CONFIG_SHMEM #define CONFIG_SWAP -- cgit v1.2.3-59-g8ed1b From db050f2924fcf39428bdadf28970a32cfaf256ef Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:57 -0700 Subject: radix-tree: add missing sibling entry functionality The code I previously added to enable multiorder radix tree entries was untested and therefore buggy. This commit adds the support functions that Ross and I decided were necessary over a four-week period of iterating various designs. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 799f341977d0..585965afc808 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -80,6 +80,46 @@ static inline void *indirect_to_ptr(void *ptr) return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#ifdef CONFIG_RADIX_TREE_MULTIORDER +/* Sibling slots point directly to another slot in the same node */ +static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) +{ + void **ptr = node; + return (parent->slots <= ptr) && + (ptr < parent->slots + RADIX_TREE_MAP_SIZE); +} +#else +static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) +{ + return false; +} +#endif + +static inline unsigned long get_slot_offset(struct radix_tree_node *parent, + void **slot) +{ + return slot - parent->slots; +} + +static unsigned radix_tree_descend(struct radix_tree_node *parent, + struct radix_tree_node **nodep, unsigned offset) +{ + void **entry = rcu_dereference_raw(parent->slots[offset]); + +#ifdef CONFIG_RADIX_TREE_MULTIORDER + if (radix_tree_is_indirect_ptr(entry)) { + unsigned long siboff = get_slot_offset(parent, entry); + if (siboff < RADIX_TREE_MAP_SIZE) { + offset = siboff; + entry = rcu_dereference_raw(parent->slots[offset]); + } + } +#endif + + *nodep = (void *)entry; + return offset; +} + static inline gfp_t root_gfp_mask(struct radix_tree_root *root) { return root->gfp_mask & __GFP_BITS_MASK; -- cgit v1.2.3-59-g8ed1b From 3b8c00f68405e9c037a6d8ae0d5d9da7f8a34e6a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:59 -0700 Subject: radix-tree: fix sibling entry insertion The subtraction was the wrong way round, leading to undefined behaviour (shift by an amount larger than the size of the type). Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 585965afc808..c0366d1d2613 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -526,8 +526,8 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, #ifdef CONFIG_RADIX_TREE_MULTIORDER /* Insert pointers to the canonical entry */ - if ((shift - order) > 0) { - int i, n = 1 << (shift - order); + if (order > shift) { + int i, n = 1 << (order - shift); offset = offset & ~(n - 1); slot = ptr_to_indirect(&node->slots[offset]); for (i = 0; i < n; i++) { -- cgit v1.2.3-59-g8ed1b From 29e0967c2f66771f654cef7168c90a53737abcdf Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:02 -0700 Subject: radix-tree: fix deleting a multi-order entry through an alias If we deleted an entry through an index which looked up a sibling pointer, we'd end up zeroing out the wrong slots in the node. Use get_slot_offset() to find the right slot. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index c0366d1d2613..b3364b9ecc83 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1558,7 +1558,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, return entry; } - offset = index & RADIX_TREE_MAP_MASK; + offset = get_slot_offset(node, slot); /* * Clear all tags associated with the item to be deleted. -- cgit v1.2.3-59-g8ed1b From aa5475760235672f316fbf29cdfb82a75016dbdf Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:05 -0700 Subject: radix-tree: remove restriction on multi-order entries Now that sibling pointers are handled explicitly, there is no purpose served by restricting the order to be >= RADIX_TREE_MAP_SHIFT. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b3364b9ecc83..6900f7b67c49 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -483,8 +483,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, unsigned int height, shift, offset; int error; - BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT)); - /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index, order); -- cgit v1.2.3-59-g8ed1b From 1456a439fc2dcc0c3d1a2d7af1fd83962813aaee Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:08 -0700 Subject: radix-tree: introduce radix_tree_load_root() All the tree walking functions start with some variant of this code; centralise it in one place so we're not chasing subtly different bugs everywhere. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 6900f7b67c49..272ce810d29b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -405,6 +405,29 @@ static inline unsigned long radix_tree_maxindex(unsigned int height) return height_to_maxindex[height]; } +static inline unsigned long node_maxindex(struct radix_tree_node *node) +{ + return radix_tree_maxindex(node->path & RADIX_TREE_HEIGHT_MASK); +} + +static unsigned radix_tree_load_root(struct radix_tree_root *root, + struct radix_tree_node **nodep, unsigned long *maxindex) +{ + struct radix_tree_node *node = rcu_dereference_raw(root->rnode); + + *nodep = node; + + if (likely(radix_tree_is_indirect_ptr(node))) { + node = indirect_to_ptr(node); + *maxindex = node_maxindex(node); + return (node->path & RADIX_TREE_HEIGHT_MASK) * + RADIX_TREE_MAP_SHIFT; + } + + *maxindex = 0; + return 0; +} + /* * Extend a radix tree so it can store key @index. */ -- cgit v1.2.3-59-g8ed1b From 49ea6ebcd3080ebf2c589b5f1437dd8bb2f90395 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:11 -0700 Subject: radix-tree: fix extending the tree for multi-order entries at offset 0 The current code will insert entries at each level, as if we're going to add a new entry at the bottom level, so we then get an -EEXIST when we try to insert the entry into the tree. The best way to fix this is to not check 'order' when inserting into an empty tree. We still need to 'extend' the tree to the height necessary for the maximum index corresponding to this entry, so pass that value to radix_tree_extend() rather than the index we're asked to create, or we won't create a tree that's deep enough. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 272ce810d29b..f13ddbba8ace 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -432,7 +432,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, * Extend a radix tree so it can store key @index. */ static int radix_tree_extend(struct radix_tree_root *root, - unsigned long index, unsigned order) + unsigned long index) { struct radix_tree_node *node; struct radix_tree_node *slot; @@ -444,7 +444,7 @@ static int radix_tree_extend(struct radix_tree_root *root, while (index > radix_tree_maxindex(height)) height++; - if ((root->rnode == NULL) && (order == 0)) { + if (root->rnode == NULL) { root->height = height; goto out; } @@ -467,7 +467,7 @@ static int radix_tree_extend(struct radix_tree_root *root, node->count = 1; node->parent = NULL; slot = root->rnode; - if (radix_tree_is_indirect_ptr(slot) && newheight > 1) { + if (radix_tree_is_indirect_ptr(slot)) { slot = indirect_to_ptr(slot); slot->parent = node; slot = ptr_to_indirect(slot); @@ -478,7 +478,7 @@ static int radix_tree_extend(struct radix_tree_root *root, root->height = newheight; } while (height > root->height); out: - return 0; + return height * RADIX_TREE_MAP_SHIFT; } /** @@ -503,20 +503,26 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, void ***slotp) { struct radix_tree_node *node = NULL, *slot; + unsigned long maxindex; unsigned int height, shift, offset; - int error; + unsigned long max = index | ((1UL << order) - 1); + + shift = radix_tree_load_root(root, &slot, &maxindex); /* Make sure the tree is high enough. */ - if (index > radix_tree_maxindex(root->height)) { - error = radix_tree_extend(root, index, order); - if (error) + if (max > maxindex) { + int error = radix_tree_extend(root, max); + if (error < 0) return error; + shift = error; + slot = root->rnode; + if (order == shift) { + shift += RADIX_TREE_MAP_SHIFT; + root->height++; + } } - slot = root->rnode; - height = root->height; - shift = height * RADIX_TREE_MAP_SHIFT; offset = 0; /* uninitialised var warning */ while (shift > order) { -- cgit v1.2.3-59-g8ed1b From afe0e395b6d1817fa5393f1ad6fcbf71406b016d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:17 -0700 Subject: radix-tree: fix several shrinking bugs with multiorder entries Setting the indirect bit on the user data entry used to be unambiguous because the tree walking code knew not to expect internal nodes in the last level of the tree. Multiorder entries can appear at any level of the tree, and a leaf with the indirect bit set is indistinguishable from a pointer to a node. Introduce a special entry (RADIX_TREE_RETRY) which is neither a valid user entry, nor a valid pointer to a node. The radix_tree_deref_retry() function continues to work the same way, but tree walking code can distinguish it from a pointer to a node. Also fix the condition for setting slot->parent to NULL; it does not matter what height the tree is, it only matters whether slot is an indirect pointer. Move this code above the comment which is referring to the assignment to root->rnode. Also fix the condition for preventing the tree from shrinking to a single entry if it's a multiorder entry. Add a test-case to the test suite that checks that the tree goes back down to its original height after an item is inserted & deleted from a higher index in the tree. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 23 +++++++++++---------- tools/testing/radix-tree/multiorder.c | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f13ddbba8ace..a1ba41730071 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -80,6 +80,8 @@ static inline void *indirect_to_ptr(void *ptr) return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define RADIX_TREE_RETRY ptr_to_indirect(NULL) + #ifdef CONFIG_RADIX_TREE_MULTIORDER /* Sibling slots point directly to another slot in the same node */ static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) @@ -1443,6 +1445,14 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) slot = to_free->slots[0]; if (!slot) break; + if (!radix_tree_is_indirect_ptr(slot) && (root->height > 1)) + break; + + if (radix_tree_is_indirect_ptr(slot)) { + slot = indirect_to_ptr(slot); + slot->parent = NULL; + slot = ptr_to_indirect(slot); + } /* * We don't need rcu_assign_pointer(), since we are simply @@ -1451,14 +1461,6 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * (to_free->slots[0]), it will be safe to dereference the new * one (root->rnode) as far as dependent read barriers go. */ - if (root->height > 1) { - if (!radix_tree_is_indirect_ptr(slot)) - break; - - slot = indirect_to_ptr(slot); - slot->parent = NULL; - slot = ptr_to_indirect(slot); - } root->rnode = slot; root->height--; @@ -1480,9 +1482,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * also results in a stale slot). So tag the slot as indirect * to force callers to retry. */ - if (root->height == 0) - *((unsigned long *)&to_free->slots[0]) |= - RADIX_TREE_INDIRECT_PTR; + if (!radix_tree_is_indirect_ptr(slot)) + to_free->slots[0] = RADIX_TREE_RETRY; radix_tree_node_free(to_free); } diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index cfe718c78eb6..71f34a047002 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -46,6 +46,41 @@ static void multiorder_check(unsigned long index, int order) item_check_absent(&tree, i); } +static void multiorder_shrink(unsigned long index, int order) +{ + unsigned long i; + unsigned long max = 1 << order; + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_node *node; + + printf("Multiorder shrink index %ld, order %d\n", index, order); + + assert(item_insert_order(&tree, 0, order) == 0); + + node = tree.rnode; + + assert(item_insert(&tree, index) == 0); + assert(node != tree.rnode); + + assert(item_delete(&tree, index) != 0); + assert(node == tree.rnode); + + for (i = 0; i < max; i++) { + struct item *item = item_lookup(&tree, i); + assert(item != 0); + assert(item->index == 0); + } + for (i = max; i < 2*max; i++) + item_check_absent(&tree, i); + + if (!item_delete(&tree, 0)) { + printf("failed to delete index %ld (order %d)\n", index, order); abort(); + } + + for (i = 0; i < 2*max; i++) + item_check_absent(&tree, i); +} + void multiorder_checks(void) { int i; @@ -55,4 +90,8 @@ void multiorder_checks(void) multiorder_check(0, i); multiorder_check((1UL << i) + 1, i); } + + for (i = 0; i < 15; i++) + multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); + } -- cgit v1.2.3-59-g8ed1b From 858299544efcf2577511bb018b9e73637ac71a2f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:20 -0700 Subject: radix-tree: rewrite __radix_tree_lookup Use the new multi-order support functions to rewrite __radix_tree_lookup() Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 48 ++++++++++++++++-------------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index a1ba41730071..f14ada9830ca 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -634,44 +634,28 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, struct radix_tree_node **nodep, void ***slotp) { struct radix_tree_node *node, *parent; - unsigned int height, shift; + unsigned long maxindex; + unsigned int shift; void **slot; - node = rcu_dereference_raw(root->rnode); - if (node == NULL) - return NULL; - - if (!radix_tree_is_indirect_ptr(node)) { - if (index > 0) - return NULL; - - if (nodep) - *nodep = NULL; - if (slotp) - *slotp = (void **)&root->rnode; - return node; - } - node = indirect_to_ptr(node); - - height = node->path & RADIX_TREE_HEIGHT_MASK; - if (index > radix_tree_maxindex(height)) + restart: + parent = NULL; + slot = (void **)&root->rnode; + shift = radix_tree_load_root(root, &node, &maxindex); + if (index > maxindex) return NULL; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - - do { - parent = node; - slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK); - node = rcu_dereference_raw(*slot); - if (node == NULL) - return NULL; - if (!radix_tree_is_indirect_ptr(node)) - break; - node = indirect_to_ptr(node); + while (radix_tree_is_indirect_ptr(node)) { + unsigned offset; + if (node == RADIX_TREE_RETRY) + goto restart; + parent = indirect_to_ptr(node); shift -= RADIX_TREE_MAP_SHIFT; - height--; - } while (height > 0); + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = radix_tree_descend(parent, &node, offset); + slot = parent->slots + offset; + } if (nodep) *nodep = parent; -- cgit v1.2.3-59-g8ed1b From 7b60e9ad59a31dd98c2f7ef841e2882c2b0e0f3b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:23 -0700 Subject: radix-tree: fix multiorder BUG_ON in radix_tree_insert These BUG_ON tests are to ensure that all the tags are clear when inserting a new entry. If we insert a multiorder entry, we'll end up looking at the tags for a different node, and so the BUG_ON can end up triggering spuriously. Also, we now have three tags, not two, so check all three are clear, and check all the root tags with a single call to BUG_ON since the bits are stored contiguously. Include a test-case to ensure this problem does not reoccur. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 14 ++++++++++---- tools/testing/radix-tree/multiorder.c | 12 ++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f14ada9830ca..ff460423ff4b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -165,6 +165,11 @@ static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); } +static inline unsigned root_tags_get(struct radix_tree_root *root) +{ + return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT; +} + /* * Returns 1 if any slot in the node has this tag set. * Otherwise returns 0. @@ -604,12 +609,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, rcu_assign_pointer(*slot, item); if (node) { + unsigned offset = get_slot_offset(node, slot); node->count++; - BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); - BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); + BUG_ON(tag_get(node, 0, offset)); + BUG_ON(tag_get(node, 1, offset)); + BUG_ON(tag_get(node, 2, offset)); } else { - BUG_ON(root_tag_get(root, 0)); - BUG_ON(root_tag_get(root, 1)); + BUG_ON(root_tags_get(root)); } return 0; diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 71f34a047002..0a311a5f39de 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -81,6 +81,17 @@ static void multiorder_shrink(unsigned long index, int order) item_check_absent(&tree, i); } +static void multiorder_insert_bug(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 0); + radix_tree_tag_set(&tree, 0, 0); + item_insert_order(&tree, 3 << 6, 6); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -94,4 +105,5 @@ void multiorder_checks(void) for (i = 0; i < 15; i++) multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); + multiorder_insert_bug(); } -- cgit v1.2.3-59-g8ed1b From 21ef533931f73a8e963a6107aa5ec51b192f28be Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:26 -0700 Subject: radix-tree: add support for multi-order iterating This enables the macros radix_tree_for_each_slot() and friends to be used with multi-order entries. The way that this works is that we treat all entries in a given slots[] array as a single chunk. If the index given to radix_tree_next_chunk() happens to point us to a sibling entry, we will back up iter->index so that it points to the canonical entry, and that will be the place where we start our iteration. As we're processing a chunk in radix_tree_next_slot(), we process canonical entries, skip over sibling entries, and restart the chunk lookup if we find a non-sibling indirect pointer. This drops back to the radix_tree_next_chunk() code, which will re-walk the tree and look for another chunk. This allows us to properly handle multi-order entries mixed with other entries that are at various heights in the radix tree. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 69 +++++++++++++++++++++++---- lib/radix-tree.c | 66 ++++++++++++++----------- tools/testing/radix-tree/generated/autoconf.h | 3 ++ tools/testing/radix-tree/linux/kernel.h | 5 +- 4 files changed, 102 insertions(+), 41 deletions(-) create mode 100644 tools/testing/radix-tree/generated/autoconf.h (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index e1512a607709..8558d52e1c7b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -330,8 +330,9 @@ static inline void radix_tree_preload_end(void) * struct radix_tree_iter - radix tree iterator state * * @index: index of current slot - * @next_index: next-to-last index for this chunk + * @next_index: one beyond the last index for this chunk * @tags: bit-mask for tag-iterating + * @shift: shift for the node that holds our slots * * This radix tree iterator works in terms of "chunks" of slots. A chunk is a * subinterval of slots contained within one radix tree leaf node. It is @@ -344,8 +345,20 @@ struct radix_tree_iter { unsigned long index; unsigned long next_index; unsigned long tags; +#ifdef CONFIG_RADIX_TREE_MULTIORDER + unsigned int shift; +#endif }; +static inline unsigned int iter_shift(struct radix_tree_iter *iter) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + return iter->shift; +#else + return 0; +#endif +} + #define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ #define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ #define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ @@ -405,6 +418,12 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) return NULL; } +static inline unsigned long +__radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots) +{ + return iter->index + (slots << iter_shift(iter)); +} + /** * radix_tree_iter_next - resume iterating when the chunk may be invalid * @iter: iterator state @@ -416,7 +435,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) static inline __must_check void **radix_tree_iter_next(struct radix_tree_iter *iter) { - iter->next_index = iter->index + 1; + iter->next_index = __radix_tree_iter_add(iter, 1); iter->tags = 0; return NULL; } @@ -430,7 +449,12 @@ void **radix_tree_iter_next(struct radix_tree_iter *iter) static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { - return iter->next_index - iter->index; + return (iter->next_index - iter->index) >> iter_shift(iter); +} + +static inline void *indirect_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } /** @@ -448,24 +472,51 @@ static __always_inline void ** radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) { if (flags & RADIX_TREE_ITER_TAGGED) { + void *canon = slot; + iter->tags >>= 1; + if (unlikely(!iter->tags)) + return NULL; + while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && + radix_tree_is_indirect_ptr(slot[1])) { + if (indirect_to_ptr(slot[1]) == canon) { + iter->tags >>= 1; + iter->index = __radix_tree_iter_add(iter, 1); + slot++; + continue; + } + iter->next_index = __radix_tree_iter_add(iter, 1); + return NULL; + } if (likely(iter->tags & 1ul)) { - iter->index++; + iter->index = __radix_tree_iter_add(iter, 1); return slot + 1; } - if (!(flags & RADIX_TREE_ITER_CONTIG) && likely(iter->tags)) { + if (!(flags & RADIX_TREE_ITER_CONTIG)) { unsigned offset = __ffs(iter->tags); iter->tags >>= offset; - iter->index += offset + 1; + iter->index = __radix_tree_iter_add(iter, offset + 1); return slot + offset + 1; } } else { - long size = radix_tree_chunk_size(iter); + long count = radix_tree_chunk_size(iter); + void *canon = slot; - while (--size > 0) { + while (--count > 0) { slot++; - iter->index++; + iter->index = __radix_tree_iter_add(iter, 1); + + if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && + radix_tree_is_indirect_ptr(*slot)) { + if (indirect_to_ptr(*slot) == canon) + continue; + else { + iter->next_index = iter->index; + break; + } + } + if (likely(*slot)) return slot; if (flags & RADIX_TREE_ITER_CONTIG) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ff460423ff4b..a4da86e40def 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -75,11 +75,6 @@ static inline void *ptr_to_indirect(void *ptr) return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); } -static inline void *indirect_to_ptr(void *ptr) -{ - return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); -} - #define RADIX_TREE_RETRY ptr_to_indirect(NULL) #ifdef CONFIG_RADIX_TREE_MULTIORDER @@ -885,6 +880,14 @@ int radix_tree_tag_get(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_tag_get); +static inline void __set_iter_shift(struct radix_tree_iter *iter, + unsigned int shift) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + iter->shift = shift; +#endif +} + /** * radix_tree_next_chunk - find next chunk of slots for iteration * @@ -898,7 +901,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; struct radix_tree_node *rnode, *node; - unsigned long index, offset, height; + unsigned long index, offset, maxindex; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) return NULL; @@ -916,33 +919,39 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (!index && iter->index) return NULL; - rnode = rcu_dereference_raw(root->rnode); + restart: + shift = radix_tree_load_root(root, &rnode, &maxindex); + if (index > maxindex) + return NULL; + if (radix_tree_is_indirect_ptr(rnode)) { rnode = indirect_to_ptr(rnode); - } else if (rnode && !index) { + } else if (rnode) { /* Single-slot tree */ - iter->index = 0; - iter->next_index = 1; + iter->index = index; + iter->next_index = maxindex + 1; iter->tags = 1; + __set_iter_shift(iter, shift); return (void **)&root->rnode; } else return NULL; -restart: - height = rnode->path & RADIX_TREE_HEIGHT_MASK; - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + shift -= RADIX_TREE_MAP_SHIFT; offset = index >> shift; - /* Index outside of the tree */ - if (offset >= RADIX_TREE_MAP_SIZE) - return NULL; - node = rnode; while (1) { struct radix_tree_node *slot; + unsigned new_off = radix_tree_descend(node, &slot, offset); + + if (new_off < offset) { + offset = new_off; + index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); + index |= offset << shift; + } + if ((flags & RADIX_TREE_ITER_TAGGED) ? - !test_bit(offset, node->tags[tag]) : - !node->slots[offset]) { + !tag_get(node, tag, offset) : !slot) { /* Hole detected */ if (flags & RADIX_TREE_ITER_CONTIG) return NULL; @@ -954,7 +963,10 @@ restart: offset + 1); else while (++offset < RADIX_TREE_MAP_SIZE) { - if (node->slots[offset]) + void *slot = node->slots[offset]; + if (is_sibling_entry(node, slot)) + continue; + if (slot) break; } index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); @@ -964,25 +976,23 @@ restart: return NULL; if (offset == RADIX_TREE_MAP_SIZE) goto restart; + slot = rcu_dereference_raw(node->slots[offset]); } - /* This is leaf-node */ - if (!shift) - break; - - slot = rcu_dereference_raw(node->slots[offset]); - if (slot == NULL) + if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) goto restart; if (!radix_tree_is_indirect_ptr(slot)) break; + node = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } /* Update the iterator state */ - iter->index = index; - iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1; + iter->index = index & ~((1 << shift) - 1); + iter->next_index = (index | ((RADIX_TREE_MAP_SIZE << shift) - 1)) + 1; + __set_iter_shift(iter, shift); /* Construct iter->tags bit-mask from node->tags[tag] array */ if (flags & RADIX_TREE_ITER_TAGGED) { diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h new file mode 100644 index 000000000000..ad18cf5a2a3a --- /dev/null +++ b/tools/testing/radix-tree/generated/autoconf.h @@ -0,0 +1,3 @@ +#define CONFIG_RADIX_TREE_MULTIORDER 1 +#define CONFIG_SHMEM 1 +#define CONFIG_SWAP 1 diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 8ea0ed450810..be98a47b4e1b 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -8,10 +8,7 @@ #include #include "../../include/linux/compiler.h" - -#define CONFIG_RADIX_TREE_MULTIORDER -#define CONFIG_SHMEM -#define CONFIG_SWAP +#include "../../../include/linux/kconfig.h" #define RADIX_TREE_MAP_SHIFT 3 -- cgit v1.2.3-59-g8ed1b From fb969909dd18930ad68d8f8b8e7895816cf74b0a Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:32 -0700 Subject: radix-tree: rewrite radix_tree_tag_set Use the new multi-order support functions to rewrite radix_tree_tag_set() Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index a4da86e40def..5234a95024a3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -722,35 +722,32 @@ EXPORT_SYMBOL(radix_tree_lookup); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag) { - unsigned int height, shift; - struct radix_tree_node *slot; - - height = root->height; - BUG_ON(index > radix_tree_maxindex(height)); + struct radix_tree_node *node, *parent; + unsigned long maxindex; + unsigned int shift; - slot = indirect_to_ptr(root->rnode); - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + shift = radix_tree_load_root(root, &node, &maxindex); + BUG_ON(index > maxindex); - while (height > 0) { - int offset; + while (radix_tree_is_indirect_ptr(node)) { + unsigned offset; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - if (!tag_get(slot, tag, offset)) - tag_set(slot, tag, offset); - slot = slot->slots[offset]; - BUG_ON(slot == NULL); - if (!radix_tree_is_indirect_ptr(slot)) - break; - slot = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; - height--; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + + parent = indirect_to_ptr(node); + offset = radix_tree_descend(parent, &node, offset); + BUG_ON(!node); + + if (!tag_get(parent, tag, offset)) + tag_set(parent, tag, offset); } /* set the root's tag bit */ - if (slot && !root_tag_get(root, tag)) + if (!root_tag_get(root, tag)) root_tag_set(root, tag); - return slot; + return node; } EXPORT_SYMBOL(radix_tree_tag_set); -- cgit v1.2.3-59-g8ed1b From 00f47b581105b91f8f18edd3074322ae609a8bc5 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:35 -0700 Subject: radix-tree: rewrite radix_tree_tag_clear Use the new multi-order support functions to rewrite radix_tree_tag_clear() Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 5234a95024a3..ee56562e0a2d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -768,44 +768,40 @@ EXPORT_SYMBOL(radix_tree_tag_set); void *radix_tree_tag_clear(struct radix_tree_root *root, unsigned long index, unsigned int tag) { - struct radix_tree_node *node = NULL; - struct radix_tree_node *slot = NULL; - unsigned int height, shift; + struct radix_tree_node *node, *parent; + unsigned long maxindex; + unsigned int shift; int uninitialized_var(offset); - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; - - shift = height * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; + shift = radix_tree_load_root(root, &node, &maxindex); + if (index > maxindex) + return NULL; - while (shift) { - if (slot == NULL) - goto out; - if (!radix_tree_is_indirect_ptr(slot)) - break; - slot = indirect_to_ptr(slot); + parent = NULL; + while (radix_tree_is_indirect_ptr(node)) { shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; - slot = slot->slots[offset]; + + parent = indirect_to_ptr(node); + offset = radix_tree_descend(parent, &node, offset); } - if (slot == NULL) + if (node == NULL) goto out; - while (node) { - if (!tag_get(node, tag, offset)) + index >>= shift; + + while (parent) { + if (!tag_get(parent, tag, offset)) goto out; - tag_clear(node, tag, offset); - if (any_tag_set(node, tag)) + tag_clear(parent, tag, offset); + if (any_tag_set(parent, tag)) goto out; index >>= RADIX_TREE_MAP_SHIFT; offset = index & RADIX_TREE_MAP_MASK; - node = node->parent; + parent = parent->parent; } /* clear the root's tag bit */ @@ -813,7 +809,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, root_tag_clear(root, tag); out: - return slot; + return node; } EXPORT_SYMBOL(radix_tree_tag_clear); -- cgit v1.2.3-59-g8ed1b From 4589ba6d0f439e4673830fdc65099179832ddde5 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:38 -0700 Subject: radix-tree: rewrite radix_tree_tag_get Use the new multi-order support functions to rewrite radix_tree_tag_get() Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ee56562e0a2d..b1ca74489bc2 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -831,45 +831,37 @@ EXPORT_SYMBOL(radix_tree_tag_clear); int radix_tree_tag_get(struct radix_tree_root *root, unsigned long index, unsigned int tag) { - unsigned int height, shift; - struct radix_tree_node *node; + struct radix_tree_node *node, *parent; + unsigned long maxindex; + unsigned int shift; - /* check the root's tag bit */ if (!root_tag_get(root, tag)) return 0; - node = rcu_dereference_raw(root->rnode); + shift = radix_tree_load_root(root, &node, &maxindex); + if (index > maxindex) + return 0; if (node == NULL) return 0; - if (!radix_tree_is_indirect_ptr(node)) - return (index == 0); - node = indirect_to_ptr(node); - - height = node->path & RADIX_TREE_HEIGHT_MASK; - if (index > radix_tree_maxindex(height)) - return 0; + while (radix_tree_is_indirect_ptr(node)) { + int offset; - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + shift -= RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; - for ( ; ; ) { - int offset; + parent = indirect_to_ptr(node); + offset = radix_tree_descend(parent, &node, offset); - if (node == NULL) + if (!node) return 0; - node = indirect_to_ptr(node); - - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - if (!tag_get(node, tag, offset)) + if (!tag_get(parent, tag, offset)) return 0; - if (height == 1) - return 1; - node = rcu_dereference_raw(node->slots[offset]); - if (!radix_tree_is_indirect_ptr(node)) - return 1; - shift -= RADIX_TREE_MAP_SHIFT; - height--; + if (node == RADIX_TREE_RETRY) + break; } + + return 1; } EXPORT_SYMBOL(radix_tree_tag_get); -- cgit v1.2.3-59-g8ed1b From 8a14f4d8328cc8615f8a5487c4173f36a8314796 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:44 -0700 Subject: radix-tree: fix radix_tree_create for sibling entries If the radix tree user attempted to insert a colliding entry with an existing multiorder entry, then radix_tree_create() could encounter a sibling entry when walking down the tree to look for a slot. Use radix_tree_descend() to fix the problem, and add a test-case to make sure the problem doesn't come back in future. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 4 ++-- tools/testing/radix-tree/multiorder.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b1ca74489bc2..9b5d8a963897 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -548,9 +548,9 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, /* Go a level down */ height--; shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; node = indirect_to_ptr(slot); - slot = node->slots[offset]; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = radix_tree_descend(node, &slot, offset); } #ifdef CONFIG_RADIX_TREE_MULTIORDER diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 1b6fc9b19930..fc934578e1ef 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -135,6 +135,11 @@ static void multiorder_check(unsigned long index, int order) item_check_absent(&tree, i); for (i = max; i < 2*max; i++) item_check_absent(&tree, i); + for (i = min; i < max; i++) { + static void *entry = (void *) + (0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY); + assert(radix_tree_insert(&tree, i, entry) == -EEXIST); + } assert(item_delete(&tree, index) != 0); -- cgit v1.2.3-59-g8ed1b From 0a2efc6c809b01872321d9c7e7d82d59ac6fde10 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:46 -0700 Subject: radix-tree: rewrite radix_tree_locate_item Use the new multi-order support functions to rewrite radix_tree_locate_item(). Modify the locate tests to test multiorder entries too. [hughd@google.com: radix_tree_locate_item() is often returning the wrong index] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1605012108490.1166@eggly.anvils Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 87 ++++++++++++++++++++--------------------- tools/testing/radix-tree/main.c | 30 ++++++++------ 2 files changed, 61 insertions(+), 56 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9b5d8a963897..8329a2e950eb 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1303,58 +1303,54 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) #include /* for cond_resched() */ +struct locate_info { + unsigned long found_index; + bool stop; +}; + /* * This linear search is at present only useful to shmem_unuse_inode(). */ static unsigned long __locate(struct radix_tree_node *slot, void *item, - unsigned long index, unsigned long *found_index) + unsigned long index, struct locate_info *info) { unsigned int shift, height; unsigned long i; height = slot->path & RADIX_TREE_HEIGHT_MASK; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; + shift = height * RADIX_TREE_MAP_SHIFT; - for ( ; height > 1; height--) { - i = (index >> shift) & RADIX_TREE_MAP_MASK; - for (;;) { - if (slot->slots[i] != NULL) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - i++; - if (i == RADIX_TREE_MAP_SIZE) - goto out; - } + do { + shift -= RADIX_TREE_MAP_SHIFT; - slot = rcu_dereference_raw(slot->slots[i]); - if (slot == NULL) - goto out; - if (!radix_tree_is_indirect_ptr(slot)) { - if (slot == item) { - *found_index = index + i; - index = 0; - } else { - index += shift; + for (i = (index >> shift) & RADIX_TREE_MAP_MASK; + i < RADIX_TREE_MAP_SIZE; + i++, index += (1UL << shift)) { + struct radix_tree_node *node = + rcu_dereference_raw(slot->slots[i]); + if (node == RADIX_TREE_RETRY) + goto out; + if (!radix_tree_is_indirect_ptr(node)) { + if (node == item) { + info->found_index = index; + info->stop = true; + goto out; + } + continue; } - goto out; + node = indirect_to_ptr(node); + if (is_sibling_entry(slot, node)) + continue; + slot = node; + break; } - slot = indirect_to_ptr(slot); - shift -= RADIX_TREE_MAP_SHIFT; - } + if (i == RADIX_TREE_MAP_SIZE) + break; + } while (shift); - /* Bottom level: check items */ - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { - if (slot->slots[i] == item) { - *found_index = index + i; - index = 0; - goto out; - } - } - index += RADIX_TREE_MAP_SIZE; out: + if ((index == 0) && (i == RADIX_TREE_MAP_SIZE)) + info->stop = true; return index; } @@ -1372,7 +1368,10 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) struct radix_tree_node *node; unsigned long max_index; unsigned long cur_index = 0; - unsigned long found_index = -1; + struct locate_info info = { + .found_index = -1, + .stop = false, + }; do { rcu_read_lock(); @@ -1380,24 +1379,24 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) if (!radix_tree_is_indirect_ptr(node)) { rcu_read_unlock(); if (node == item) - found_index = 0; + info.found_index = 0; break; } node = indirect_to_ptr(node); - max_index = radix_tree_maxindex(node->path & - RADIX_TREE_HEIGHT_MASK); + + max_index = node_maxindex(node); if (cur_index > max_index) { rcu_read_unlock(); break; } - cur_index = __locate(node, item, cur_index, &found_index); + cur_index = __locate(node, item, cur_index, &info); rcu_read_unlock(); cond_resched(); - } while (cur_index != 0 && cur_index <= max_index); + } while (!info.stop && cur_index <= max_index); - return found_index; + return info.found_index; } #else unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b6a700b00cce..65231e9ba3e8 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -232,17 +232,18 @@ void copy_tag_check(void) item_kill_tree(&tree); } -void __locate_check(struct radix_tree_root *tree, unsigned long index) +void __locate_check(struct radix_tree_root *tree, unsigned long index, + unsigned order) { struct item *item; unsigned long index2; - item_insert(tree, index); + item_insert_order(tree, index, order); item = item_lookup(tree, index); index2 = radix_tree_locate_item(tree, item); if (index != index2) { - printf("index %ld inserted; found %ld\n", - index, index2); + printf("index %ld order %d inserted; found %ld\n", + index, order, index2); abort(); } } @@ -250,21 +251,26 @@ void __locate_check(struct radix_tree_root *tree, unsigned long index) static void locate_check(void) { RADIX_TREE(tree, GFP_KERNEL); + unsigned order; unsigned long offset, index; - for (offset = 0; offset < (1 << 3); offset++) { - for (index = 0; index < (1UL << 5); index++) { - __locate_check(&tree, index + offset); - } - if (radix_tree_locate_item(&tree, &tree) != -1) - abort(); + for (order = 0; order < 20; order++) { + for (offset = 0; offset < (1 << (order + 3)); + offset += (1UL << order)) { + for (index = 0; index < (1UL << (order + 5)); + index += (1UL << order)) { + __locate_check(&tree, index + offset, order); + } + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); - item_kill_tree(&tree); + item_kill_tree(&tree); + } } if (radix_tree_locate_item(&tree, &tree) != -1) abort(); - __locate_check(&tree, -1); + __locate_check(&tree, -1, 0); if (radix_tree_locate_item(&tree, &tree) != -1) abort(); item_kill_tree(&tree); -- cgit v1.2.3-59-g8ed1b From 070c5ac2740b5db89d381a09fb03b2480b2f7a74 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:52 -0700 Subject: radix-tree: fix radix_tree_range_tag_if_tagged() for multiorder entries I had previously decided that tagging a single multiorder entry would count as tagging 2^order entries for the purposes of 'nr_to_tag'. I now believe that decision to be a mistake, and it should count as a single entry. That's more likely to be what callers expect. When walking back up the tree from a newly-tagged entry, the current code assumed we were starting from the lowest level of the tree; if we have a multiorder entry with an order at least RADIX_TREE_MAP_SHIFT in size then we need to shift the index by 'shift' before we start walking back up the tree, or we will end up not setting tags on higher entries, and then mistakenly thinking that entries below a certain point in the tree are not tagged. If the first index we examine is a sibling entry of a tagged multiorder entry, we were not tagging it. We need to examine the canonical entry, and the easiest way to do that is to use radix_tree_descend(). We then have to skip over sibling slots when looking for the next entry in the tree or we will end up walking back to the canonical entry. Add several tests for radix_tree_range_tag_if_tagged(). Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 76 +++++++++++++++-------------------- tools/testing/radix-tree/multiorder.c | 25 +++++++++++- tools/testing/radix-tree/tag_check.c | 10 +++++ 3 files changed, 67 insertions(+), 44 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8329a2e950eb..8df0df2835b4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1033,14 +1033,13 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int iftag, unsigned int settag) { - unsigned int height = root->height; - struct radix_tree_node *node = NULL; - struct radix_tree_node *slot; - unsigned int shift; + struct radix_tree_node *slot, *node = NULL; + unsigned long maxindex; + unsigned int shift = radix_tree_load_root(root, &slot, &maxindex); unsigned long tagged = 0; unsigned long index = *first_indexp; - last_index = min(last_index, radix_tree_maxindex(height)); + last_index = min(last_index, maxindex); if (index > last_index) return 0; if (!nr_to_tag) @@ -1049,80 +1048,71 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, *first_indexp = last_index + 1; return 0; } - if (height == 0) { + if (!radix_tree_is_indirect_ptr(slot)) { *first_indexp = last_index + 1; root_tag_set(root, settag); return 1; } - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = indirect_to_ptr(root->rnode); + node = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; for (;;) { unsigned long upindex; - int offset; + unsigned offset; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - if (!slot->slots[offset]) + offset = radix_tree_descend(node, &slot, offset); + if (!slot) goto next; - if (!tag_get(slot, iftag, offset)) + if (!tag_get(node, iftag, offset)) goto next; - if (shift) { - node = slot; - slot = slot->slots[offset]; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - shift -= RADIX_TREE_MAP_SHIFT; - continue; - } else { - slot = node; - node = node->parent; - } + /* Sibling slots never have tags set on them */ + if (radix_tree_is_indirect_ptr(slot)) { + node = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; + continue; } /* tag the leaf */ - tagged += 1 << shift; - tag_set(slot, settag, offset); + tagged++; + tag_set(node, settag, offset); + slot = node->parent; /* walk back up the path tagging interior nodes */ - upindex = index; - while (node) { + upindex = index >> shift; + while (slot) { upindex >>= RADIX_TREE_MAP_SHIFT; offset = upindex & RADIX_TREE_MAP_MASK; /* stop if we find a node with the tag already set */ - if (tag_get(node, settag, offset)) + if (tag_get(slot, settag, offset)) break; - tag_set(node, settag, offset); - node = node->parent; + tag_set(slot, settag, offset); + slot = slot->parent; } - /* - * Small optimization: now clear that node pointer. - * Since all of this slot's ancestors now have the tag set - * from setting it above, we have no further need to walk - * back up the tree setting tags, until we update slot to - * point to another radix_tree_node. - */ - node = NULL; - -next: + next: /* Go to next item at level determined by 'shift' */ index = ((index >> shift) + 1) << shift; /* Overflow can happen when last_index is ~0UL... */ if (index > last_index || !index) break; - if (tagged >= nr_to_tag) - break; - while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + while (offset == 0) { /* * We've fully scanned this node. Go up. Because * last_index is guaranteed to be in the tree, what * we do below cannot wander astray. */ - slot = slot->parent; + node = node->parent; shift += RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; } + if (is_sibling_entry(node, node->slots[offset])) + goto next; + if (tagged >= nr_to_tag) + break; } /* * We need not to tag the root tag if there is no tag which is set with diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index fc934578e1ef..c061f4bd6c05 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -26,6 +26,7 @@ static void __multiorder_tag_test(int index, int order) { RADIX_TREE(tree, GFP_KERNEL); int base, err, i; + unsigned long first = 0; /* our canonical entry */ base = index & ~((1 << order) - 1); @@ -59,13 +60,16 @@ static void __multiorder_tag_test(int index, int order) assert(!radix_tree_tag_get(&tree, i, 1)); } + assert(radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, 10, 0, 1) == 1); assert(radix_tree_tag_clear(&tree, index, 0)); for_each_index(i, base, order) { assert(!radix_tree_tag_get(&tree, i, 0)); - assert(!radix_tree_tag_get(&tree, i, 1)); + assert(radix_tree_tag_get(&tree, i, 1)); } + assert(radix_tree_tag_clear(&tree, index, 1)); + assert(!radix_tree_tagged(&tree, 0)); assert(!radix_tree_tagged(&tree, 1)); @@ -244,6 +248,7 @@ void multiorder_tagged_iteration(void) RADIX_TREE(tree, GFP_KERNEL); struct radix_tree_iter iter; void **slot; + unsigned long first = 0; int i; printf("Multiorder tagged iteration test\n"); @@ -280,6 +285,24 @@ void multiorder_tagged_iteration(void) i++; } + radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, + MT_NUM_ENTRIES, 1, 2); + + i = 0; + radix_tree_for_each_tagged(slot, &tree, &iter, 1, 2) { + assert(iter.index == tag_index[i]); + i++; + } + + first = 1; + radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, + MT_NUM_ENTRIES, 1, 0); + i = 0; + radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) { + assert(iter.index == tag_index[i]); + i++; + } + item_kill_tree(&tree); } diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index 83136be552a0..b7447ceb75e9 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -12,6 +12,7 @@ static void __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) { + unsigned long first = 0; int ret; item_check_absent(tree, index); @@ -22,6 +23,10 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) item_tag_set(tree, index, tag); ret = item_tag_get(tree, index, tag); assert(ret != 0); + ret = radix_tree_range_tag_if_tagged(tree, &first, ~0UL, 10, tag, !tag); + assert(ret == 1); + ret = item_tag_get(tree, index, !tag); + assert(ret != 0); ret = item_delete(tree, index); assert(ret != 0); item_insert(tree, index); @@ -304,6 +309,7 @@ static void single_check(void) struct item *items[BATCH]; RADIX_TREE(tree, GFP_KERNEL); int ret; + unsigned long first = 0; item_insert(&tree, 0); item_tag_set(&tree, 0, 0); @@ -313,6 +319,10 @@ static void single_check(void) assert(ret == 0); verify_tag_consistency(&tree, 0); verify_tag_consistency(&tree, 1); + ret = radix_tree_range_tag_if_tagged(&tree, &first, 10, 10, 0, 1); + assert(ret == 1); + ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1); + assert(ret == 1); item_kill_tree(&tree); } -- cgit v1.2.3-59-g8ed1b From 0796c58325533f87c00949a545eb607baa8441cb Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:55 -0700 Subject: radix-tree: fix radix_tree_dump() for multi-order entries - Print which indices are covered by every leaf entry - Print sibling entries - Print the node pointer instead of the slot entry - Build by default in userspace, and make it accessible to the test-suite Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 48 +++++++++++++++++++++++++---------------- tools/testing/radix-tree/test.h | 1 + 2 files changed, 30 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8df0df2835b4..a1a44f94c171 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -215,27 +215,36 @@ radix_tree_find_next_bit(const unsigned long *addr, return size; } -#if 0 -static void dump_node(void *slot, int height, int offset) +#ifndef __KERNEL__ +static void dump_node(struct radix_tree_node *node, unsigned offset, + unsigned shift, unsigned long index) { - struct radix_tree_node *node; - int i; - - if (!slot) - return; - - if (height == 0) { - pr_debug("radix entry %p offset %d\n", slot, offset); - return; - } + unsigned long i; - node = indirect_to_ptr(slot); pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n", - slot, offset, node->tags[0][0], node->tags[1][0], - node->tags[2][0], node->path, node->count, node->parent); - - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) - dump_node(node->slots[i], height - 1, i); + node, offset, + node->tags[0][0], node->tags[1][0], node->tags[2][0], + node->path, node->count, node->parent); + + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + unsigned long first = index | (i << shift); + unsigned long last = first | ((1UL << shift) - 1); + void *entry = node->slots[i]; + if (!entry) + continue; + if (is_sibling_entry(node, entry)) { + pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", + entry, i, + *(void **)indirect_to_ptr(entry), + first, last); + } else if (!radix_tree_is_indirect_ptr(entry)) { + pr_debug("radix entry %p offset %ld indices %ld-%ld\n", + entry, i, first, last); + } else { + dump_node(indirect_to_ptr(entry), i, + shift - RADIX_TREE_MAP_SHIFT, first); + } + } } /* For debug */ @@ -246,7 +255,8 @@ static void radix_tree_dump(struct radix_tree_root *root) root->gfp_mask >> __GFP_BITS_SHIFT); if (!radix_tree_is_indirect_ptr(root->rnode)) return; - dump_node(root->rnode, root->height, 0); + dump_node(indirect_to_ptr(root->rnode), 0, + (root->height - 1) * RADIX_TREE_MAP_SHIFT, 0); } #endif diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 53cb595db44a..67217c93fe95 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -40,5 +40,6 @@ extern int nr_allocated; /* Normally private parts of lib/radix-tree.c */ void *indirect_to_ptr(void *ptr); +void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long radix_tree_maxindex(unsigned int height); -- cgit v1.2.3-59-g8ed1b From 6b053b8e5f5e12089fbbc127167199746bb5b4c1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:58 -0700 Subject: radix-tree: add copyright statements The multiorder support is a sufficiently large feature to be worth adding copyrigt lines for. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index a1a44f94c171..1e75813b9f34 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -4,6 +4,8 @@ * Copyright (C) 2005 SGI, Christoph Lameter * Copyright (C) 2006 Nick Piggin * Copyright (C) 2012 Konstantin Khlebnikov + * Copyright (C) 2016 Intel, Matthew Wilcox + * Copyright (C) 2016 Intel, Ross Zwisler * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as -- cgit v1.2.3-59-g8ed1b From 2fcd9005cc03ab09ea2a940515ed728d43df66c4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:04 -0700 Subject: radix-tree: miscellaneous fixes Typos, whitespace, grammar, line length, using the correct types, etc. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 70 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1e75813b9f34..75944e42e4a0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -66,7 +66,7 @@ static struct kmem_cache *radix_tree_node_cachep; * Per-cpu pool of preloaded nodes */ struct radix_tree_preload { - int nr; + unsigned nr; /* nodes->private_data points to next preallocated node */ struct radix_tree_node *nodes; }; @@ -147,7 +147,7 @@ static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); } -static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) +static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) { root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); } @@ -159,7 +159,7 @@ static inline void root_tag_clear_all(struct radix_tree_root *root) static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) { - return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); + return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); } static inline unsigned root_tags_get(struct radix_tree_root *root) @@ -173,7 +173,7 @@ static inline unsigned root_tags_get(struct radix_tree_root *root) */ static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) { - int idx; + unsigned idx; for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { if (node->tags[tag][idx]) return 1; @@ -273,9 +273,9 @@ radix_tree_node_alloc(struct radix_tree_root *root) gfp_t gfp_mask = root_gfp_mask(root); /* - * Preload code isn't irq safe and it doesn't make sence to use - * preloading in the interrupt anyway as all the allocations have to - * be atomic. So just do normal allocation when in interrupt. + * Preload code isn't irq safe and it doesn't make sense to use + * preloading during an interrupt anyway as all the allocations have + * to be atomic. So just do normal allocation when in interrupt. */ if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) { struct radix_tree_preload *rtp; @@ -448,7 +448,6 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) { - struct radix_tree_node *node; struct radix_tree_node *slot; unsigned int height; int tag; @@ -465,7 +464,9 @@ static int radix_tree_extend(struct radix_tree_root *root, do { unsigned int newheight; - if (!(node = radix_tree_node_alloc(root))) + struct radix_tree_node *node = radix_tree_node_alloc(root); + + if (!node) return -ENOMEM; /* Propagate the aggregated tag info into the new root */ @@ -542,7 +543,8 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, while (shift > order) { if (slot == NULL) { /* Have to add a child node. */ - if (!(slot = radix_tree_node_alloc(root))) + slot = radix_tree_node_alloc(root); + if (!slot) return -ENOMEM; slot->path = height; slot->parent = node; @@ -722,13 +724,13 @@ EXPORT_SYMBOL(radix_tree_lookup); * radix_tree_tag_set - set a tag on a radix tree node * @root: radix tree root * @index: index key - * @tag: tag index + * @tag: tag index * * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) * corresponding to @index in the radix tree. From * the root all the way down to the leaf node. * - * Returns the address of the tagged item. Setting a tag on a not-present + * Returns the address of the tagged item. Setting a tag on a not-present * item is a bug. */ void *radix_tree_tag_set(struct radix_tree_root *root, @@ -767,11 +769,11 @@ EXPORT_SYMBOL(radix_tree_tag_set); * radix_tree_tag_clear - clear a tag on a radix tree node * @root: radix tree root * @index: index key - * @tag: tag index + * @tag: tag index * * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) - * corresponding to @index in the radix tree. If - * this causes the leaf node to have no tags set then clear the tag in the + * corresponding to @index in the radix tree. If this causes + * the leaf node to have no tags set then clear the tag in the * next-to-leaf node, etc. * * Returns the address of the tagged item on success, else NULL. ie: @@ -829,7 +831,7 @@ EXPORT_SYMBOL(radix_tree_tag_clear); * radix_tree_tag_get - get a tag on a radix tree node * @root: radix tree root * @index: index key - * @tag: tag index (< RADIX_TREE_MAX_TAGS) + * @tag: tag index (< RADIX_TREE_MAX_TAGS) * * Return values: * @@ -1035,7 +1037,7 @@ EXPORT_SYMBOL(radix_tree_next_chunk); * set is outside the range we are scanning. This reults in dangling tags and * can lead to problems with later tag operations (e.g. livelocks on lookups). * - * The function returns number of leaves where the tag was set and sets + * The function returns the number of leaves where the tag was set and sets * *first_indexp to the first unscanned index. * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must * be prepared to handle that. @@ -1153,9 +1155,10 @@ EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); * * Like radix_tree_lookup, radix_tree_gang_lookup may be called under * rcu_read_lock. In this case, rather than the returned results being - * an atomic snapshot of the tree at a single point in time, the semantics - * of an RCU protected gang lookup are as though multiple radix_tree_lookups - * have been issued in individual locks, and results stored in 'results'. + * an atomic snapshot of the tree at a single point in time, the + * semantics of an RCU protected gang lookup are as though multiple + * radix_tree_lookups have been issued in individual locks, and results + * stored in 'results'. */ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, @@ -1460,7 +1463,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * their slot to become empty sooner or later. * * For example, lockless pagecache will look up a slot, deref - * the page pointer, and if the page is 0 refcount it means it + * the page pointer, and if the page has 0 refcount it means it * was concurrently deleted from pagecache so try the deref * again. Fortunately there is already a requirement for logic * to retry the entire slot lookup -- the indirect pointer @@ -1649,24 +1652,23 @@ static __init void radix_tree_init_maxindex(void) } static int radix_tree_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) + unsigned long action, void *hcpu) { - int cpu = (long)hcpu; - struct radix_tree_preload *rtp; - struct radix_tree_node *node; - - /* Free per-cpu pool of perloaded nodes */ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - rtp = &per_cpu(radix_tree_preloads, cpu); - while (rtp->nr) { + int cpu = (long)hcpu; + struct radix_tree_preload *rtp; + struct radix_tree_node *node; + + /* Free per-cpu pool of preloaded nodes */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + rtp = &per_cpu(radix_tree_preloads, cpu); + while (rtp->nr) { node = rtp->nodes; rtp->nodes = node->private_data; kmem_cache_free(radix_tree_node_cachep, node); rtp->nr--; - } - } - return NOTIFY_OK; + } + } + return NOTIFY_OK; } void __init radix_tree_init(void) -- cgit v1.2.3-59-g8ed1b From 0c7fa0a8418cbe0e8963fe36db9575d03b8589f7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:07 -0700 Subject: radix-tree: split node->path into offset and height Neither piece of information we're storing in node->path can be larger than 64, so store each in its own unsigned char instead of shifting and masking to store them both in an unsigned int. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 7 ++----- lib/radix-tree.c | 38 +++++++++++++++++--------------------- 2 files changed, 19 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 8558d52e1c7b..2d2ad9d685a3 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -84,16 +84,13 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ RADIX_TREE_MAP_SHIFT)) -/* Height component in node->path */ -#define RADIX_TREE_HEIGHT_SHIFT (RADIX_TREE_MAX_PATH + 1) -#define RADIX_TREE_HEIGHT_MASK ((1UL << RADIX_TREE_HEIGHT_SHIFT) - 1) - /* Internally used bits of node->count */ #define RADIX_TREE_COUNT_SHIFT (RADIX_TREE_MAP_SHIFT + 1) #define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1) struct radix_tree_node { - unsigned int path; /* Offset in parent & height from the bottom */ + unsigned char height; /* From the bottom */ + unsigned char offset; /* Slot offset in parent */ unsigned int count; union { struct { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 75944e42e4a0..dd04b51e5fbb 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -218,15 +218,15 @@ radix_tree_find_next_bit(const unsigned long *addr, } #ifndef __KERNEL__ -static void dump_node(struct radix_tree_node *node, unsigned offset, +static void dump_node(struct radix_tree_node *node, unsigned shift, unsigned long index) { unsigned long i; - pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n", - node, offset, + pr_debug("radix node: %p offset %d tags %lx %lx %lx height %d count %d parent %p\n", + node, node->offset, node->tags[0][0], node->tags[1][0], node->tags[2][0], - node->path, node->count, node->parent); + node->height, node->count, node->parent); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { unsigned long first = index | (i << shift); @@ -243,7 +243,7 @@ static void dump_node(struct radix_tree_node *node, unsigned offset, pr_debug("radix entry %p offset %ld indices %ld-%ld\n", entry, i, first, last); } else { - dump_node(indirect_to_ptr(entry), i, + dump_node(indirect_to_ptr(entry), shift - RADIX_TREE_MAP_SHIFT, first); } } @@ -257,7 +257,7 @@ static void radix_tree_dump(struct radix_tree_root *root) root->gfp_mask >> __GFP_BITS_SHIFT); if (!radix_tree_is_indirect_ptr(root->rnode)) return; - dump_node(indirect_to_ptr(root->rnode), 0, + dump_node(indirect_to_ptr(root->rnode), (root->height - 1) * RADIX_TREE_MAP_SHIFT, 0); } #endif @@ -421,7 +421,7 @@ static inline unsigned long radix_tree_maxindex(unsigned int height) static inline unsigned long node_maxindex(struct radix_tree_node *node) { - return radix_tree_maxindex(node->path & RADIX_TREE_HEIGHT_MASK); + return radix_tree_maxindex(node->height); } static unsigned radix_tree_load_root(struct radix_tree_root *root, @@ -434,8 +434,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, if (likely(radix_tree_is_indirect_ptr(node))) { node = indirect_to_ptr(node); *maxindex = node_maxindex(node); - return (node->path & RADIX_TREE_HEIGHT_MASK) * - RADIX_TREE_MAP_SHIFT; + return node->height * RADIX_TREE_MAP_SHIFT; } *maxindex = 0; @@ -476,9 +475,10 @@ static int radix_tree_extend(struct radix_tree_root *root, } /* Increase the height. */ - newheight = root->height+1; - BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK); - node->path = newheight; + newheight = root->height + 1; + BUG_ON(newheight > BITS_PER_LONG); + node->height = newheight; + node->offset = 0; node->count = 1; node->parent = NULL; slot = root->rnode; @@ -546,13 +546,13 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = radix_tree_node_alloc(root); if (!slot) return -ENOMEM; - slot->path = height; + slot->height = height; + slot->offset = offset; slot->parent = node; if (node) { rcu_assign_pointer(node->slots[offset], ptr_to_indirect(slot)); node->count++; - slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT; } else rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); @@ -1319,11 +1319,10 @@ struct locate_info { static unsigned long __locate(struct radix_tree_node *slot, void *item, unsigned long index, struct locate_info *info) { - unsigned int shift, height; + unsigned int shift; unsigned long i; - height = slot->path & RADIX_TREE_HEIGHT_MASK; - shift = height * RADIX_TREE_MAP_SHIFT; + shift = slot->height * RADIX_TREE_MAP_SHIFT; do { shift -= RADIX_TREE_MAP_SHIFT; @@ -1508,10 +1507,7 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, parent = node->parent; if (parent) { - unsigned int offset; - - offset = node->path >> RADIX_TREE_HEIGHT_SHIFT; - parent->slots[offset] = NULL; + parent->slots[node->offset] = NULL; parent->count--; } else { root_tag_clear_all(root); -- cgit v1.2.3-59-g8ed1b From c12e51b07b3ac4c188fd91a82f96840fdb9cca6f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:10 -0700 Subject: radix-tree: replace node->height with node->shift node->shift represents the shift necessary for looking in the slots array at this level. It is equal to the old (node->height - 1) * RADIX_TREE_MAP_SHIFT. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 2 +- lib/radix-tree.c | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 2d2ad9d685a3..037458257e12 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -89,7 +89,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) #define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1) struct radix_tree_node { - unsigned char height; /* From the bottom */ + unsigned char shift; /* Bits remaining in each slot */ unsigned char offset; /* Slot offset in parent */ unsigned int count; union { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index dd04b51e5fbb..648da9080418 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -223,10 +223,10 @@ static void dump_node(struct radix_tree_node *node, { unsigned long i; - pr_debug("radix node: %p offset %d tags %lx %lx %lx height %d count %d parent %p\n", + pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n", node, node->offset, node->tags[0][0], node->tags[1][0], node->tags[2][0], - node->height, node->count, node->parent); + node->shift, node->count, node->parent); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { unsigned long first = index | (i << shift); @@ -419,9 +419,14 @@ static inline unsigned long radix_tree_maxindex(unsigned int height) return height_to_maxindex[height]; } +static inline unsigned long shift_maxindex(unsigned int shift) +{ + return (RADIX_TREE_MAP_SIZE << shift) - 1; +} + static inline unsigned long node_maxindex(struct radix_tree_node *node) { - return radix_tree_maxindex(node->height); + return shift_maxindex(node->shift); } static unsigned radix_tree_load_root(struct radix_tree_root *root, @@ -434,7 +439,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, if (likely(radix_tree_is_indirect_ptr(node))) { node = indirect_to_ptr(node); *maxindex = node_maxindex(node); - return node->height * RADIX_TREE_MAP_SHIFT; + return node->shift + RADIX_TREE_MAP_SHIFT; } *maxindex = 0; @@ -475,9 +480,9 @@ static int radix_tree_extend(struct radix_tree_root *root, } /* Increase the height. */ - newheight = root->height + 1; + newheight = root->height; BUG_ON(newheight > BITS_PER_LONG); - node->height = newheight; + node->shift = newheight * RADIX_TREE_MAP_SHIFT; node->offset = 0; node->count = 1; node->parent = NULL; @@ -490,7 +495,7 @@ static int radix_tree_extend(struct radix_tree_root *root, node->slots[0] = slot; node = ptr_to_indirect(node); rcu_assign_pointer(root->rnode, node); - root->height = newheight; + root->height = ++newheight; } while (height > root->height); out: return height * RADIX_TREE_MAP_SHIFT; @@ -519,7 +524,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, { struct radix_tree_node *node = NULL, *slot; unsigned long maxindex; - unsigned int height, shift, offset; + unsigned int shift, offset; unsigned long max = index | ((1UL << order) - 1); shift = radix_tree_load_root(root, &slot, &maxindex); @@ -537,16 +542,15 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } } - height = root->height; - offset = 0; /* uninitialised var warning */ while (shift > order) { + shift -= RADIX_TREE_MAP_SHIFT; if (slot == NULL) { /* Have to add a child node. */ slot = radix_tree_node_alloc(root); if (!slot) return -ENOMEM; - slot->height = height; + slot->shift = shift; slot->offset = offset; slot->parent = node; if (node) { @@ -560,8 +564,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, break; /* Go a level down */ - height--; - shift -= RADIX_TREE_MAP_SHIFT; node = indirect_to_ptr(slot); offset = (index >> shift) & RADIX_TREE_MAP_MASK; offset = radix_tree_descend(node, &slot, offset); @@ -1322,7 +1324,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, unsigned int shift; unsigned long i; - shift = slot->height * RADIX_TREE_MAP_SHIFT; + shift = slot->shift + RADIX_TREE_MAP_SHIFT; do { shift -= RADIX_TREE_MAP_SHIFT; -- cgit v1.2.3-59-g8ed1b From fb209019c92a9141fd73f3c4928edc1b299b3490 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:13 -0700 Subject: radix-tree: remove a use of root->height from delete_node If radix_tree_shrink returns whether it managed to shrink, then __radix_tree_delete_node doesn't ned to query the tree to find out whether it did any work or not. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 648da9080418..75c9e6197b5b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1415,8 +1415,10 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) * radix_tree_shrink - shrink height of a radix tree to minimal * @root radix tree root */ -static inline void radix_tree_shrink(struct radix_tree_root *root) +static inline bool radix_tree_shrink(struct radix_tree_root *root) { + bool shrunk = false; + /* try to shrink tree height */ while (root->height > 0) { struct radix_tree_node *to_free = root->rnode; @@ -1476,7 +1478,10 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) to_free->slots[0] = RADIX_TREE_RETRY; radix_tree_node_free(to_free); + shrunk = true; } + + return shrunk; } /** @@ -1499,11 +1504,8 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *parent; if (node->count) { - if (node == indirect_to_ptr(root->rnode)) { - radix_tree_shrink(root); - if (root->height == 0) - deleted = true; - } + if (node == indirect_to_ptr(root->rnode)) + deleted |= radix_tree_shrink(root); return deleted; } -- cgit v1.2.3-59-g8ed1b From d0891265bbc988dc91ed8580b38eb3dac128581b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:19 -0700 Subject: radix-tree: remove root->height The only remaining references to root->height were in extend and shrink, where it was updated. Now we can remove it entirely. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 3 -- lib/radix-tree.c | 106 +++++++++++++-------------------------------- 2 files changed, 31 insertions(+), 78 deletions(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 037458257e12..c0d223cfac00 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -110,13 +110,11 @@ struct radix_tree_node { /* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */ struct radix_tree_root { - unsigned int height; gfp_t gfp_mask; struct radix_tree_node __rcu *rnode; }; #define RADIX_TREE_INIT(mask) { \ - .height = 0, \ .gfp_mask = (mask), \ .rnode = NULL, \ } @@ -126,7 +124,6 @@ struct radix_tree_root { #define INIT_RADIX_TREE(root, mask) \ do { \ - (root)->height = 0; \ (root)->gfp_mask = (mask); \ (root)->rnode = NULL; \ } while (0) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 75c9e6197b5b..58f79fee8c71 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -38,12 +38,6 @@ #include /* in_interrupt() */ -/* - * The height_to_maxindex array needs to be one deeper than the maximum - * path as height 0 holds only 1 entry. - */ -static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; - /* * Radix tree node cache. */ @@ -218,8 +212,7 @@ radix_tree_find_next_bit(const unsigned long *addr, } #ifndef __KERNEL__ -static void dump_node(struct radix_tree_node *node, - unsigned shift, unsigned long index) +static void dump_node(struct radix_tree_node *node, unsigned long index) { unsigned long i; @@ -229,8 +222,8 @@ static void dump_node(struct radix_tree_node *node, node->shift, node->count, node->parent); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { - unsigned long first = index | (i << shift); - unsigned long last = first | ((1UL << shift) - 1); + unsigned long first = index | (i << node->shift); + unsigned long last = first | ((1UL << node->shift) - 1); void *entry = node->slots[i]; if (!entry) continue; @@ -243,8 +236,7 @@ static void dump_node(struct radix_tree_node *node, pr_debug("radix entry %p offset %ld indices %ld-%ld\n", entry, i, first, last); } else { - dump_node(indirect_to_ptr(entry), - shift - RADIX_TREE_MAP_SHIFT, first); + dump_node(indirect_to_ptr(entry), first); } } } @@ -252,13 +244,12 @@ static void dump_node(struct radix_tree_node *node, /* For debug */ static void radix_tree_dump(struct radix_tree_root *root) { - pr_debug("radix root: %p height %d rnode %p tags %x\n", - root, root->height, root->rnode, + pr_debug("radix root: %p rnode %p tags %x\n", + root, root->rnode, root->gfp_mask >> __GFP_BITS_SHIFT); if (!radix_tree_is_indirect_ptr(root->rnode)) return; - dump_node(indirect_to_ptr(root->rnode), - (root->height - 1) * RADIX_TREE_MAP_SHIFT, 0); + dump_node(indirect_to_ptr(root->rnode), 0); } #endif @@ -411,14 +402,8 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) EXPORT_SYMBOL(radix_tree_maybe_preload); /* - * Return the maximum key which can be store into a - * radix tree with height HEIGHT. + * The maximum index which can be stored in a radix tree */ -static inline unsigned long radix_tree_maxindex(unsigned int height) -{ - return height_to_maxindex[height]; -} - static inline unsigned long shift_maxindex(unsigned int shift) { return (RADIX_TREE_MAP_SIZE << shift) - 1; @@ -450,24 +435,22 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, * Extend a radix tree so it can store key @index. */ static int radix_tree_extend(struct radix_tree_root *root, - unsigned long index) + unsigned long index, unsigned int shift) { struct radix_tree_node *slot; - unsigned int height; + unsigned int maxshift; int tag; - /* Figure out what the height should be. */ - height = root->height + 1; - while (index > radix_tree_maxindex(height)) - height++; + /* Figure out what the shift should be. */ + maxshift = shift; + while (index > shift_maxindex(maxshift)) + maxshift += RADIX_TREE_MAP_SHIFT; - if (root->rnode == NULL) { - root->height = height; + slot = root->rnode; + if (!slot) goto out; - } do { - unsigned int newheight; struct radix_tree_node *node = radix_tree_node_alloc(root); if (!node) @@ -479,14 +462,11 @@ static int radix_tree_extend(struct radix_tree_root *root, tag_set(node, tag, 0); } - /* Increase the height. */ - newheight = root->height; - BUG_ON(newheight > BITS_PER_LONG); - node->shift = newheight * RADIX_TREE_MAP_SHIFT; + BUG_ON(shift > BITS_PER_LONG); + node->shift = shift; node->offset = 0; node->count = 1; node->parent = NULL; - slot = root->rnode; if (radix_tree_is_indirect_ptr(slot)) { slot = indirect_to_ptr(slot); slot->parent = node; @@ -495,10 +475,11 @@ static int radix_tree_extend(struct radix_tree_root *root, node->slots[0] = slot; node = ptr_to_indirect(node); rcu_assign_pointer(root->rnode, node); - root->height = ++newheight; - } while (height > root->height); + shift += RADIX_TREE_MAP_SHIFT; + slot = node; + } while (shift <= maxshift); out: - return height * RADIX_TREE_MAP_SHIFT; + return maxshift + RADIX_TREE_MAP_SHIFT; } /** @@ -531,15 +512,13 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, /* Make sure the tree is high enough. */ if (max > maxindex) { - int error = radix_tree_extend(root, max); + int error = radix_tree_extend(root, max, shift); if (error < 0) return error; shift = error; slot = root->rnode; - if (order == shift) { + if (order == shift) shift += RADIX_TREE_MAP_SHIFT; - root->height++; - } } offset = 0; /* uninitialised var warning */ @@ -1412,32 +1391,32 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) #endif /* CONFIG_SHMEM && CONFIG_SWAP */ /** - * radix_tree_shrink - shrink height of a radix tree to minimal + * radix_tree_shrink - shrink radix tree to minimum height * @root radix tree root */ static inline bool radix_tree_shrink(struct radix_tree_root *root) { bool shrunk = false; - /* try to shrink tree height */ - while (root->height > 0) { + for (;;) { struct radix_tree_node *to_free = root->rnode; struct radix_tree_node *slot; - BUG_ON(!radix_tree_is_indirect_ptr(to_free)); + if (!radix_tree_is_indirect_ptr(to_free)) + break; to_free = indirect_to_ptr(to_free); /* * The candidate node has more than one child, or its child - * is not at the leftmost slot, or it is a multiorder entry, - * we cannot shrink. + * is not at the leftmost slot, or the child is a multiorder + * entry, we cannot shrink. */ if (to_free->count != 1) break; slot = to_free->slots[0]; if (!slot) break; - if (!radix_tree_is_indirect_ptr(slot) && (root->height > 1)) + if (!radix_tree_is_indirect_ptr(slot) && to_free->shift) break; if (radix_tree_is_indirect_ptr(slot)) { @@ -1454,7 +1433,6 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) * one (root->rnode) as far as dependent read barriers go. */ root->rnode = slot; - root->height--; /* * We have a dilemma here. The node's slot[0] must not be @@ -1515,7 +1493,6 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, parent->count--; } else { root_tag_clear_all(root); - root->height = 0; root->rnode = NULL; } @@ -1631,26 +1608,6 @@ radix_tree_node_ctor(void *arg) INIT_LIST_HEAD(&node->private_list); } -static __init unsigned long __maxindex(unsigned int height) -{ - unsigned int width = height * RADIX_TREE_MAP_SHIFT; - int shift = RADIX_TREE_INDEX_BITS - width; - - if (shift < 0) - return ~0UL; - if (shift >= BITS_PER_LONG) - return 0UL; - return ~0UL >> shift; -} - -static __init void radix_tree_init_maxindex(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) - height_to_maxindex[i] = __maxindex(i); -} - static int radix_tree_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -1677,6 +1634,5 @@ void __init radix_tree_init(void) sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, radix_tree_node_ctor); - radix_tree_init_maxindex(); hotcpu_notifier(radix_tree_callback, 0); } -- cgit v1.2.3-59-g8ed1b From 30ff46ccb303fb6f6c28b9aa9f2cdc4ba900ed3f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:22 -0700 Subject: radix-tree: rename INDIRECT_PTR to INTERNAL_NODE The name RADIX_TREE_INDIRECT_PTR doesn't really match the meaning. RADIX_TREE_INTERNAL_NODE is a better name. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 30 +++++++++++++----------------- lib/radix-tree.c | 2 +- 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c0d223cfac00..c8cc879046c7 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -29,20 +29,16 @@ #include /* - * An indirect pointer (root->rnode pointing to a radix_tree_node, rather - * than a data item) is signalled by the low bit set in the root->rnode - * pointer. - * - * In this case root->height is > 0, but the indirect pointer tests are - * needed for RCU lookups (because root->height is unreliable). The only - * time callers need worry about this is when doing a lookup_slot under - * RCU. - * - * Indirect pointer in fact is also used to tag the last pointer of a node - * when it is shrunk, before we rcu free the node. See shrink code for - * details. + * Entries in the radix tree have the low bit set if they refer to a + * radix_tree_node. If the low bit is clear then the entry is user data. + * + * We also use the low bit to indicate that the slot will be freed in the + * next RCU idle period, and users need to re-walk the tree to find the + * new slot for the index that they were looking for. See the comment in + * radix_tree_shrink() for details. */ -#define RADIX_TREE_INDIRECT_PTR 1 +#define RADIX_TREE_INTERNAL_NODE 1 + /* * A common use of the radix tree is to store pointers to struct pages; * but shmem/tmpfs needs also to store swap entries in the same tree: @@ -63,7 +59,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) { - return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR); + return (int)((unsigned long)ptr & RADIX_TREE_INTERNAL_NODE); } /*** radix-tree API starts here ***/ @@ -228,7 +224,7 @@ static inline void *radix_tree_deref_slot_protected(void **pslot, */ static inline int radix_tree_deref_retry(void *arg) { - return unlikely((unsigned long)arg & RADIX_TREE_INDIRECT_PTR); + return unlikely(radix_tree_is_indirect_ptr(arg)); } /** @@ -250,7 +246,7 @@ static inline int radix_tree_exceptional_entry(void *arg) static inline int radix_tree_exception(void *arg) { return unlikely((unsigned long)arg & - (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY)); + (RADIX_TREE_INTERNAL_NODE | RADIX_TREE_EXCEPTIONAL_ENTRY)); } /** @@ -448,7 +444,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) static inline void *indirect_to_ptr(void *ptr) { - return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); + return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); } /** diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 58f79fee8c71..31d5929a625b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -68,7 +68,7 @@ static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; static inline void *ptr_to_indirect(void *ptr) { - return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); + return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); } #define RADIX_TREE_RETRY ptr_to_indirect(NULL) -- cgit v1.2.3-59-g8ed1b From a4db4dcea1b3990e8c5dc8a03d11f36a3c0c6d8b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:24 -0700 Subject: radix-tree: rename ptr_to_indirect() to node_to_entry() ptr_to_indirect() was a bad name. What it really means is "Convert this pointer to a node into an entry suitable for storing in the radix tree". So node_to_entry() seemed like a better name. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 31d5929a625b..f66bb3932452 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -66,12 +66,12 @@ struct radix_tree_preload { }; static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; -static inline void *ptr_to_indirect(void *ptr) +static inline void *node_to_entry(void *ptr) { return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); } -#define RADIX_TREE_RETRY ptr_to_indirect(NULL) +#define RADIX_TREE_RETRY node_to_entry(NULL) #ifdef CONFIG_RADIX_TREE_MULTIORDER /* Sibling slots point directly to another slot in the same node */ @@ -470,13 +470,12 @@ static int radix_tree_extend(struct radix_tree_root *root, if (radix_tree_is_indirect_ptr(slot)) { slot = indirect_to_ptr(slot); slot->parent = node; - slot = ptr_to_indirect(slot); + slot = node_to_entry(slot); } node->slots[0] = slot; - node = ptr_to_indirect(node); - rcu_assign_pointer(root->rnode, node); + slot = node_to_entry(node); + rcu_assign_pointer(root->rnode, slot); shift += RADIX_TREE_MAP_SHIFT; - slot = node; } while (shift <= maxshift); out: return maxshift + RADIX_TREE_MAP_SHIFT; @@ -534,11 +533,11 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot->parent = node; if (node) { rcu_assign_pointer(node->slots[offset], - ptr_to_indirect(slot)); + node_to_entry(slot)); node->count++; } else rcu_assign_pointer(root->rnode, - ptr_to_indirect(slot)); + node_to_entry(slot)); } else if (!radix_tree_is_indirect_ptr(slot)) break; @@ -553,7 +552,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, if (order > shift) { int i, n = 1 << (order - shift); offset = offset & ~(n - 1); - slot = ptr_to_indirect(&node->slots[offset]); + slot = node_to_entry(&node->slots[offset]); for (i = 0; i < n; i++) { if (node->slots[offset + i]) return -EEXIST; @@ -1422,7 +1421,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) if (radix_tree_is_indirect_ptr(slot)) { slot = indirect_to_ptr(slot); slot->parent = NULL; - slot = ptr_to_indirect(slot); + slot = node_to_entry(slot); } /* @@ -1563,7 +1562,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, radix_tree_tag_clear(root, index, tag); } - delete_sibling_entries(node, ptr_to_indirect(slot), offset); + delete_sibling_entries(node, node_to_entry(slot), offset); node->slots[offset] = NULL; node->count--; -- cgit v1.2.3-59-g8ed1b From 4dd6c0987ca43d6544f4f0a3f86f6ea3bfc60fc1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:27 -0700 Subject: radix-tree: rename indirect_to_ptr() to entry_to_node() Mirrors the earlier commit introducing node_to_entry(). Also change the type returned to be a struct radix_tree_node pointer. That lets us simplify a couple of places in the radix tree shrink & extend paths where we could convert an entry into a pointer, modify the node, then convert the pointer back into an entry. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 12 +++++------ lib/radix-tree.c | 48 ++++++++++++++++++----------------------- tools/testing/radix-tree/test.c | 4 ++-- tools/testing/radix-tree/test.h | 1 - 4 files changed, 28 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c8cc879046c7..b94aa198dd6b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -442,7 +442,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) return (iter->next_index - iter->index) >> iter_shift(iter); } -static inline void *indirect_to_ptr(void *ptr) +static inline struct radix_tree_node *entry_to_node(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); } @@ -469,7 +469,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return NULL; while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && radix_tree_is_indirect_ptr(slot[1])) { - if (indirect_to_ptr(slot[1]) == canon) { + if (entry_to_node(slot[1]) == canon) { iter->tags >>= 1; iter->index = __radix_tree_iter_add(iter, 1); slot++; @@ -499,12 +499,10 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && radix_tree_is_indirect_ptr(*slot)) { - if (indirect_to_ptr(*slot) == canon) + if (entry_to_node(*slot) == canon) continue; - else { - iter->next_index = iter->index; - break; - } + iter->next_index = iter->index; + break; } if (likely(*slot)) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f66bb3932452..3c3fdd9c5bb3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -230,13 +230,13 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) if (is_sibling_entry(node, entry)) { pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", entry, i, - *(void **)indirect_to_ptr(entry), + *(void **)entry_to_node(entry), first, last); } else if (!radix_tree_is_indirect_ptr(entry)) { pr_debug("radix entry %p offset %ld indices %ld-%ld\n", entry, i, first, last); } else { - dump_node(indirect_to_ptr(entry), first); + dump_node(entry_to_node(entry), first); } } } @@ -249,7 +249,7 @@ static void radix_tree_dump(struct radix_tree_root *root) root->gfp_mask >> __GFP_BITS_SHIFT); if (!radix_tree_is_indirect_ptr(root->rnode)) return; - dump_node(indirect_to_ptr(root->rnode), 0); + dump_node(entry_to_node(root->rnode), 0); } #endif @@ -422,7 +422,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, *nodep = node; if (likely(radix_tree_is_indirect_ptr(node))) { - node = indirect_to_ptr(node); + node = entry_to_node(node); *maxindex = node_maxindex(node); return node->shift + RADIX_TREE_MAP_SHIFT; } @@ -467,11 +467,8 @@ static int radix_tree_extend(struct radix_tree_root *root, node->offset = 0; node->count = 1; node->parent = NULL; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - slot->parent = node; - slot = node_to_entry(slot); - } + if (radix_tree_is_indirect_ptr(slot)) + entry_to_node(slot)->parent = node; node->slots[0] = slot; slot = node_to_entry(node); rcu_assign_pointer(root->rnode, slot); @@ -542,7 +539,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, break; /* Go a level down */ - node = indirect_to_ptr(slot); + node = entry_to_node(slot); offset = (index >> shift) & RADIX_TREE_MAP_MASK; offset = radix_tree_descend(node, &slot, offset); } @@ -645,7 +642,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, if (node == RADIX_TREE_RETRY) goto restart; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; offset = radix_tree_descend(parent, &node, offset); @@ -729,7 +726,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); BUG_ON(!node); @@ -777,7 +774,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); } @@ -844,7 +841,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); if (!node) @@ -904,7 +901,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; if (radix_tree_is_indirect_ptr(rnode)) { - rnode = indirect_to_ptr(rnode); + rnode = entry_to_node(rnode); } else if (rnode) { /* Single-slot tree */ iter->index = index; @@ -963,7 +960,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (!radix_tree_is_indirect_ptr(slot)) break; - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } @@ -1048,7 +1045,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, return 1; } - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; for (;;) { @@ -1063,7 +1060,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, goto next; /* Sibling slots never have tags set on them */ if (radix_tree_is_indirect_ptr(slot)) { - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; continue; } @@ -1322,7 +1319,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, } continue; } - node = indirect_to_ptr(node); + node = entry_to_node(node); if (is_sibling_entry(slot, node)) continue; slot = node; @@ -1367,7 +1364,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) break; } - node = indirect_to_ptr(node); + node = entry_to_node(node); max_index = node_maxindex(node); if (cur_index > max_index) { @@ -1403,7 +1400,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) if (!radix_tree_is_indirect_ptr(to_free)) break; - to_free = indirect_to_ptr(to_free); + to_free = entry_to_node(to_free); /* * The candidate node has more than one child, or its child @@ -1418,11 +1415,8 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) if (!radix_tree_is_indirect_ptr(slot) && to_free->shift) break; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - slot->parent = NULL; - slot = node_to_entry(slot); - } + if (radix_tree_is_indirect_ptr(slot)) + entry_to_node(slot)->parent = NULL; /* * We don't need rcu_assign_pointer(), since we are simply @@ -1481,7 +1475,7 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *parent; if (node->count) { - if (node == indirect_to_ptr(root->rnode)) + if (node == entry_to_node(root->rnode)) deleted |= radix_tree_shrink(root); return deleted; } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 3004c58b9021..7b0bc1fa5919 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -149,7 +149,7 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, int i; int j; - slot = indirect_to_ptr(slot); + slot = entry_to_node(slot); /* Verify consistency at this level */ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) { @@ -227,7 +227,7 @@ void tree_verify_min_height(struct radix_tree_root *root, int maxindex) return; } - node = indirect_to_ptr(node); + node = entry_to_node(node); assert(maxindex <= node_maxindex(node)); shift = node->shift; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 866c8c676aa4..e85131369723 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -39,7 +39,6 @@ void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag); extern int nr_allocated; /* Normally private parts of lib/radix-tree.c */ -void *indirect_to_ptr(void *ptr); void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); -- cgit v1.2.3-59-g8ed1b From b194d16c27af905d6e3552f4851bc7d9fee4e90f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:30 -0700 Subject: radix-tree: rename radix_tree_is_indirect_ptr() As with indirect_to_ptr(), ptr_to_indirect() and RADIX_TREE_INDIRECT_PTR, change radix_tree_is_indirect_ptr() to radix_tree_is_internal_node(). Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 10 ++++----- lib/radix-tree.c | 48 ++++++++++++++++++++--------------------- tools/testing/radix-tree/test.c | 4 ++-- 3 files changed, 31 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index b94aa198dd6b..bad63105e37e 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -57,7 +57,7 @@ #define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE))) -static inline int radix_tree_is_indirect_ptr(void *ptr) +static inline int radix_tree_is_internal_node(void *ptr) { return (int)((unsigned long)ptr & RADIX_TREE_INTERNAL_NODE); } @@ -224,7 +224,7 @@ static inline void *radix_tree_deref_slot_protected(void **pslot, */ static inline int radix_tree_deref_retry(void *arg) { - return unlikely(radix_tree_is_indirect_ptr(arg)); + return unlikely(radix_tree_is_internal_node(arg)); } /** @@ -259,7 +259,7 @@ static inline int radix_tree_exception(void *arg) */ static inline void radix_tree_replace_slot(void **pslot, void *item) { - BUG_ON(radix_tree_is_indirect_ptr(item)); + BUG_ON(radix_tree_is_internal_node(item)); rcu_assign_pointer(*pslot, item); } @@ -468,7 +468,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) if (unlikely(!iter->tags)) return NULL; while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_indirect_ptr(slot[1])) { + radix_tree_is_internal_node(slot[1])) { if (entry_to_node(slot[1]) == canon) { iter->tags >>= 1; iter->index = __radix_tree_iter_add(iter, 1); @@ -498,7 +498,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index = __radix_tree_iter_add(iter, 1); if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_indirect_ptr(*slot)) { + radix_tree_is_internal_node(*slot)) { if (entry_to_node(*slot) == canon) continue; iter->next_index = iter->index; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3c3fdd9c5bb3..b65c83036ca4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -100,7 +100,7 @@ static unsigned radix_tree_descend(struct radix_tree_node *parent, void **entry = rcu_dereference_raw(parent->slots[offset]); #ifdef CONFIG_RADIX_TREE_MULTIORDER - if (radix_tree_is_indirect_ptr(entry)) { + if (radix_tree_is_internal_node(entry)) { unsigned long siboff = get_slot_offset(parent, entry); if (siboff < RADIX_TREE_MAP_SIZE) { offset = siboff; @@ -232,7 +232,7 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) entry, i, *(void **)entry_to_node(entry), first, last); - } else if (!radix_tree_is_indirect_ptr(entry)) { + } else if (!radix_tree_is_internal_node(entry)) { pr_debug("radix entry %p offset %ld indices %ld-%ld\n", entry, i, first, last); } else { @@ -247,7 +247,7 @@ static void radix_tree_dump(struct radix_tree_root *root) pr_debug("radix root: %p rnode %p tags %x\n", root, root->rnode, root->gfp_mask >> __GFP_BITS_SHIFT); - if (!radix_tree_is_indirect_ptr(root->rnode)) + if (!radix_tree_is_internal_node(root->rnode)) return; dump_node(entry_to_node(root->rnode), 0); } @@ -302,7 +302,7 @@ radix_tree_node_alloc(struct radix_tree_root *root) ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask | __GFP_ACCOUNT); out: - BUG_ON(radix_tree_is_indirect_ptr(ret)); + BUG_ON(radix_tree_is_internal_node(ret)); return ret; } @@ -421,7 +421,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, *nodep = node; - if (likely(radix_tree_is_indirect_ptr(node))) { + if (likely(radix_tree_is_internal_node(node))) { node = entry_to_node(node); *maxindex = node_maxindex(node); return node->shift + RADIX_TREE_MAP_SHIFT; @@ -467,7 +467,7 @@ static int radix_tree_extend(struct radix_tree_root *root, node->offset = 0; node->count = 1; node->parent = NULL; - if (radix_tree_is_indirect_ptr(slot)) + if (radix_tree_is_internal_node(slot)) entry_to_node(slot)->parent = node; node->slots[0] = slot; slot = node_to_entry(node); @@ -535,7 +535,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } else rcu_assign_pointer(root->rnode, node_to_entry(slot)); - } else if (!radix_tree_is_indirect_ptr(slot)) + } else if (!radix_tree_is_internal_node(slot)) break; /* Go a level down */ @@ -585,7 +585,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, void **slot; int error; - BUG_ON(radix_tree_is_indirect_ptr(item)); + BUG_ON(radix_tree_is_internal_node(item)); error = __radix_tree_create(root, index, order, &node, &slot); if (error) @@ -637,7 +637,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, if (index > maxindex) return NULL; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { unsigned offset; if (node == RADIX_TREE_RETRY) @@ -720,7 +720,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, shift = radix_tree_load_root(root, &node, &maxindex); BUG_ON(index > maxindex); - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { unsigned offset; shift -= RADIX_TREE_MAP_SHIFT; @@ -770,7 +770,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, parent = NULL; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -835,7 +835,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, if (node == NULL) return 0; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { int offset; shift -= RADIX_TREE_MAP_SHIFT; @@ -900,7 +900,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (index > maxindex) return NULL; - if (radix_tree_is_indirect_ptr(rnode)) { + if (radix_tree_is_internal_node(rnode)) { rnode = entry_to_node(rnode); } else if (rnode) { /* Single-slot tree */ @@ -957,7 +957,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) goto restart; - if (!radix_tree_is_indirect_ptr(slot)) + if (!radix_tree_is_internal_node(slot)) break; node = entry_to_node(slot); @@ -1039,7 +1039,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, *first_indexp = last_index + 1; return 0; } - if (!radix_tree_is_indirect_ptr(slot)) { + if (!radix_tree_is_internal_node(slot)) { *first_indexp = last_index + 1; root_tag_set(root, settag); return 1; @@ -1059,7 +1059,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, if (!tag_get(node, iftag, offset)) goto next; /* Sibling slots never have tags set on them */ - if (radix_tree_is_indirect_ptr(slot)) { + if (radix_tree_is_internal_node(slot)) { node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; continue; @@ -1152,7 +1152,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; - if (radix_tree_is_indirect_ptr(results[ret])) { + if (radix_tree_is_internal_node(results[ret])) { slot = radix_tree_iter_retry(&iter); continue; } @@ -1235,7 +1235,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; - if (radix_tree_is_indirect_ptr(results[ret])) { + if (radix_tree_is_internal_node(results[ret])) { slot = radix_tree_iter_retry(&iter); continue; } @@ -1311,7 +1311,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, rcu_dereference_raw(slot->slots[i]); if (node == RADIX_TREE_RETRY) goto out; - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { if (node == item) { info->found_index = index; info->stop = true; @@ -1357,7 +1357,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) do { rcu_read_lock(); node = rcu_dereference_raw(root->rnode); - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { rcu_read_unlock(); if (node == item) info.found_index = 0; @@ -1398,7 +1398,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) struct radix_tree_node *to_free = root->rnode; struct radix_tree_node *slot; - if (!radix_tree_is_indirect_ptr(to_free)) + if (!radix_tree_is_internal_node(to_free)) break; to_free = entry_to_node(to_free); @@ -1412,10 +1412,10 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) slot = to_free->slots[0]; if (!slot) break; - if (!radix_tree_is_indirect_ptr(slot) && to_free->shift) + if (!radix_tree_is_internal_node(slot) && to_free->shift) break; - if (radix_tree_is_indirect_ptr(slot)) + if (radix_tree_is_internal_node(slot)) entry_to_node(slot)->parent = NULL; /* @@ -1445,7 +1445,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) * also results in a stale slot). So tag the slot as indirect * to force callers to retry. */ - if (!radix_tree_is_indirect_ptr(slot)) + if (!radix_tree_is_internal_node(slot)) to_free->slots[0] = RADIX_TREE_RETRY; radix_tree_node_free(to_free); diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 7b0bc1fa5919..a6e8099eaf4f 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -193,7 +193,7 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) { struct radix_tree_node *node = root->rnode; - if (!radix_tree_is_indirect_ptr(node)) + if (!radix_tree_is_internal_node(node)) return; verify_node(node, tag, !!root_tag_get(root, tag)); } @@ -222,7 +222,7 @@ void tree_verify_min_height(struct radix_tree_root *root, int maxindex) { unsigned shift; struct radix_tree_node *node = root->rnode; - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { assert(maxindex == 0); return; } -- cgit v1.2.3-59-g8ed1b From af49a63e101eb62376cc1d6bd25b97eb8c691d54 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:33 -0700 Subject: radix-tree: change naming conventions in radix_tree_shrink Use the more standard 'node' and 'child' instead of 'to_free' and 'slot'. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b65c83036ca4..4b4a2a20a3d1 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1395,37 +1395,37 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) bool shrunk = false; for (;;) { - struct radix_tree_node *to_free = root->rnode; - struct radix_tree_node *slot; + struct radix_tree_node *node = root->rnode; + struct radix_tree_node *child; - if (!radix_tree_is_internal_node(to_free)) + if (!radix_tree_is_internal_node(node)) break; - to_free = entry_to_node(to_free); + node = entry_to_node(node); /* * The candidate node has more than one child, or its child * is not at the leftmost slot, or the child is a multiorder * entry, we cannot shrink. */ - if (to_free->count != 1) + if (node->count != 1) break; - slot = to_free->slots[0]; - if (!slot) + child = node->slots[0]; + if (!child) break; - if (!radix_tree_is_internal_node(slot) && to_free->shift) + if (!radix_tree_is_internal_node(child) && node->shift) break; - if (radix_tree_is_internal_node(slot)) - entry_to_node(slot)->parent = NULL; + if (radix_tree_is_internal_node(child)) + entry_to_node(child)->parent = NULL; /* * We don't need rcu_assign_pointer(), since we are simply * moving the node from one part of the tree to another: if it * was safe to dereference the old pointer to it - * (to_free->slots[0]), it will be safe to dereference the new + * (node->slots[0]), it will be safe to dereference the new * one (root->rnode) as far as dependent read barriers go. */ - root->rnode = slot; + root->rnode = child; /* * We have a dilemma here. The node's slot[0] must not be @@ -1445,10 +1445,10 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) * also results in a stale slot). So tag the slot as indirect * to force callers to retry. */ - if (!radix_tree_is_internal_node(slot)) - to_free->slots[0] = RADIX_TREE_RETRY; + if (!radix_tree_is_internal_node(child)) + node->slots[0] = RADIX_TREE_RETRY; - radix_tree_node_free(to_free); + radix_tree_node_free(node); shrunk = true; } -- cgit v1.2.3-59-g8ed1b From 8c1244de00ef98f73e21eecc42d84b2742fbb4f9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:36 -0700 Subject: radix-tree: tidy up next_chunk Convert radix_tree_next_chunk to use 'child' instead of 'slot' as the name of the child node. Also use node_maxindex() where it makes sense. The 'rnode' variable was unnecessary; it doesn't overlap in usage with 'node', so we can just use 'node' the whole way through the function. Improve the testcase to start the walk from every index in the carefully constructed tree, and to accept any index within the range covered by the entry. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 53 +++++++------------ tools/testing/radix-tree/multiorder.c | 99 +++++++++++++++++++---------------- 2 files changed, 74 insertions(+), 78 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 4b4a2a20a3d1..c42867a1769a 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -876,7 +876,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags) { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; - struct radix_tree_node *rnode, *node; + struct radix_tree_node *node, *child; unsigned long index, offset, maxindex; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) @@ -896,38 +896,29 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; restart: - shift = radix_tree_load_root(root, &rnode, &maxindex); + shift = radix_tree_load_root(root, &child, &maxindex); if (index > maxindex) return NULL; + if (!child) + return NULL; - if (radix_tree_is_internal_node(rnode)) { - rnode = entry_to_node(rnode); - } else if (rnode) { + if (!radix_tree_is_internal_node(child)) { /* Single-slot tree */ iter->index = index; iter->next_index = maxindex + 1; iter->tags = 1; - __set_iter_shift(iter, shift); + __set_iter_shift(iter, 0); return (void **)&root->rnode; - } else - return NULL; - - shift -= RADIX_TREE_MAP_SHIFT; - offset = index >> shift; - - node = rnode; - while (1) { - struct radix_tree_node *slot; - unsigned new_off = radix_tree_descend(node, &slot, offset); + } - if (new_off < offset) { - offset = new_off; - index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); - index |= offset << shift; - } + do { + node = entry_to_node(child); + shift -= RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = radix_tree_descend(node, &child, offset); if ((flags & RADIX_TREE_ITER_TAGGED) ? - !tag_get(node, tag, offset) : !slot) { + !tag_get(node, tag, offset) : !child) { /* Hole detected */ if (flags & RADIX_TREE_ITER_CONTIG) return NULL; @@ -945,29 +936,23 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (slot) break; } - index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); + index &= ~node_maxindex(node); index += offset << shift; /* Overflow after ~0UL */ if (!index) return NULL; if (offset == RADIX_TREE_MAP_SIZE) goto restart; - slot = rcu_dereference_raw(node->slots[offset]); + child = rcu_dereference_raw(node->slots[offset]); } - if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) + if ((child == NULL) || (child == RADIX_TREE_RETRY)) goto restart; - if (!radix_tree_is_internal_node(slot)) - break; - - node = entry_to_node(slot); - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - } + } while (radix_tree_is_internal_node(child)); /* Update the iterator state */ - iter->index = index & ~((1 << shift) - 1); - iter->next_index = (index | ((RADIX_TREE_MAP_SIZE << shift) - 1)) + 1; + iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); + iter->next_index = (index | node_maxindex(node)) + 1; __set_iter_shift(iter, shift); /* Construct iter->tags bit-mask from node->tags[tag] array */ diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index c061f4bd6c05..39d9b9568fe2 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -202,7 +202,7 @@ void multiorder_iteration(void) RADIX_TREE(tree, GFP_KERNEL); struct radix_tree_iter iter; void **slot; - int i, err; + int i, j, err; printf("Multiorder iteration test\n"); @@ -215,29 +215,21 @@ void multiorder_iteration(void) assert(!err); } - i = 0; - /* start from index 1 to verify we find the multi-order entry at 0 */ - radix_tree_for_each_slot(slot, &tree, &iter, 1) { - int height = order[i] / RADIX_TREE_MAP_SHIFT; - int shift = height * RADIX_TREE_MAP_SHIFT; - - assert(iter.index == index[i]); - assert(iter.shift == shift); - i++; - } - - /* - * Now iterate through the tree starting at an elevated multi-order - * entry, beginning at an index in the middle of the range. - */ - i = 8; - radix_tree_for_each_slot(slot, &tree, &iter, 70) { - int height = order[i] / RADIX_TREE_MAP_SHIFT; - int shift = height * RADIX_TREE_MAP_SHIFT; - - assert(iter.index == index[i]); - assert(iter.shift == shift); - i++; + for (j = 0; j < 256; j++) { + for (i = 0; i < NUM_ENTRIES; i++) + if (j <= (index[i] | ((1 << order[i]) - 1))) + break; + + radix_tree_for_each_slot(slot, &tree, &iter, j) { + int height = order[i] / RADIX_TREE_MAP_SHIFT; + int shift = height * RADIX_TREE_MAP_SHIFT; + int mask = (1 << order[i]) - 1; + + assert(iter.index >= (index[i] &~ mask)); + assert(iter.index <= (index[i] | mask)); + assert(iter.shift == shift); + i++; + } } item_kill_tree(&tree); @@ -249,7 +241,7 @@ void multiorder_tagged_iteration(void) struct radix_tree_iter iter; void **slot; unsigned long first = 0; - int i; + int i, j; printf("Multiorder tagged iteration test\n"); @@ -268,30 +260,49 @@ void multiorder_tagged_iteration(void) for (i = 0; i < TAG_ENTRIES; i++) assert(radix_tree_tag_set(&tree, tag_index[i], 1)); - i = 0; - /* start from index 1 to verify we find the multi-order entry at 0 */ - radix_tree_for_each_tagged(slot, &tree, &iter, 1, 1) { - assert(iter.index == tag_index[i]); - i++; - } - - /* - * Now iterate through the tree starting at an elevated multi-order - * entry, beginning at an index in the middle of the range. - */ - i = 4; - radix_tree_for_each_slot(slot, &tree, &iter, 70) { - assert(iter.index == tag_index[i]); - i++; + for (j = 0; j < 256; j++) { + int mask, k; + + for (i = 0; i < TAG_ENTRIES; i++) { + for (k = i; index[k] < tag_index[i]; k++) + ; + if (j <= (index[k] | ((1 << order[k]) - 1))) + break; + } + + radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) { + for (k = i; index[k] < tag_index[i]; k++) + ; + mask = (1 << order[k]) - 1; + + assert(iter.index >= (tag_index[i] &~ mask)); + assert(iter.index <= (tag_index[i] | mask)); + i++; + } } radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, MT_NUM_ENTRIES, 1, 2); - i = 0; - radix_tree_for_each_tagged(slot, &tree, &iter, 1, 2) { - assert(iter.index == tag_index[i]); - i++; + for (j = 0; j < 256; j++) { + int mask, k; + + for (i = 0; i < TAG_ENTRIES; i++) { + for (k = i; index[k] < tag_index[i]; k++) + ; + if (j <= (index[k] | ((1 << order[k]) - 1))) + break; + } + + radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) { + for (k = i; index[k] < tag_index[i]; k++) + ; + mask = (1 << order[k]) - 1; + + assert(iter.index >= (tag_index[i] &~ mask)); + assert(iter.index <= (tag_index[i] | mask)); + i++; + } } first = 1; -- cgit v1.2.3-59-g8ed1b From a8e4da25d3c573a0c3cf2fb33e91ec5cad8d7f16 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:39 -0700 Subject: radix-tree: tidy up range_tag_if_tagged Convert radix_tree_range_tag_if_tagged to name the nodes parent, node and child instead of node & slot. Use parent->offset instead of playing games with 'upindex'. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index c42867a1769a..1a82066165db 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1009,9 +1009,9 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int iftag, unsigned int settag) { - struct radix_tree_node *slot, *node = NULL; + struct radix_tree_node *parent, *node, *child; unsigned long maxindex; - unsigned int shift = radix_tree_load_root(root, &slot, &maxindex); + unsigned int shift = radix_tree_load_root(root, &child, &maxindex); unsigned long tagged = 0; unsigned long index = *first_indexp; @@ -1024,28 +1024,25 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, *first_indexp = last_index + 1; return 0; } - if (!radix_tree_is_internal_node(slot)) { + if (!radix_tree_is_internal_node(child)) { *first_indexp = last_index + 1; root_tag_set(root, settag); return 1; } - node = entry_to_node(slot); + node = entry_to_node(child); shift -= RADIX_TREE_MAP_SHIFT; for (;;) { - unsigned long upindex; - unsigned offset; - - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - offset = radix_tree_descend(node, &slot, offset); - if (!slot) + unsigned offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = radix_tree_descend(node, &child, offset); + if (!child) goto next; if (!tag_get(node, iftag, offset)) goto next; /* Sibling slots never have tags set on them */ - if (radix_tree_is_internal_node(slot)) { - node = entry_to_node(slot); + if (radix_tree_is_internal_node(child)) { + node = entry_to_node(child); shift -= RADIX_TREE_MAP_SHIFT; continue; } @@ -1054,20 +1051,18 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, tagged++; tag_set(node, settag, offset); - slot = node->parent; /* walk back up the path tagging interior nodes */ - upindex = index >> shift; - while (slot) { - upindex >>= RADIX_TREE_MAP_SHIFT; - offset = upindex & RADIX_TREE_MAP_MASK; - + parent = node; + for (;;) { + offset = parent->offset; + parent = parent->parent; + if (!parent) + break; /* stop if we find a node with the tag already set */ - if (tag_get(slot, settag, offset)) + if (tag_get(parent, settag, offset)) break; - tag_set(slot, settag, offset); - slot = slot->parent; + tag_set(parent, settag, offset); } - next: /* Go to next item at level determined by 'shift' */ index = ((index >> shift) + 1) << shift; -- cgit v1.2.3-59-g8ed1b From 89148aa40201def3fa552f9d07dd99740d880ab2 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:42 -0700 Subject: radix-tree: tidy up __radix_tree_create() 1. Rename the existing variable 'slot' to 'child'. 2. Introduce a new variable called 'slot' which is the address of the slot we're dealing with. This lets us simplify the tree insertion, and removes the recalculation of 'slot' at the end of the function. 3. Using 'slot' in the sibling pointer insertion part makes the code more readable. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1a82066165db..9d9b4b9af4b6 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -499,12 +499,13 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, unsigned order, struct radix_tree_node **nodep, void ***slotp) { - struct radix_tree_node *node = NULL, *slot; + struct radix_tree_node *node = NULL, *child; + void **slot = (void **)&root->rnode; unsigned long maxindex; - unsigned int shift, offset; + unsigned int shift, offset = 0; unsigned long max = index | ((1UL << order) - 1); - shift = radix_tree_load_root(root, &slot, &maxindex); + shift = radix_tree_load_root(root, &child, &maxindex); /* Make sure the tree is high enough. */ if (max > maxindex) { @@ -512,51 +513,48 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, if (error < 0) return error; shift = error; - slot = root->rnode; + child = root->rnode; if (order == shift) shift += RADIX_TREE_MAP_SHIFT; } - offset = 0; /* uninitialised var warning */ while (shift > order) { shift -= RADIX_TREE_MAP_SHIFT; - if (slot == NULL) { + if (child == NULL) { /* Have to add a child node. */ - slot = radix_tree_node_alloc(root); - if (!slot) + child = radix_tree_node_alloc(root); + if (!child) return -ENOMEM; - slot->shift = shift; - slot->offset = offset; - slot->parent = node; - if (node) { - rcu_assign_pointer(node->slots[offset], - node_to_entry(slot)); + child->shift = shift; + child->offset = offset; + child->parent = node; + rcu_assign_pointer(*slot, node_to_entry(child)); + if (node) node->count++; - } else - rcu_assign_pointer(root->rnode, - node_to_entry(slot)); - } else if (!radix_tree_is_internal_node(slot)) + } else if (!radix_tree_is_internal_node(child)) break; /* Go a level down */ - node = entry_to_node(slot); + node = entry_to_node(child); offset = (index >> shift) & RADIX_TREE_MAP_MASK; - offset = radix_tree_descend(node, &slot, offset); + offset = radix_tree_descend(node, &child, offset); + slot = &node->slots[offset]; } #ifdef CONFIG_RADIX_TREE_MULTIORDER /* Insert pointers to the canonical entry */ if (order > shift) { - int i, n = 1 << (order - shift); + unsigned i, n = 1 << (order - shift); offset = offset & ~(n - 1); - slot = node_to_entry(&node->slots[offset]); + slot = &node->slots[offset]; + child = node_to_entry(slot); for (i = 0; i < n; i++) { - if (node->slots[offset + i]) + if (slot[i]) return -EEXIST; } for (i = 1; i < n; i++) { - rcu_assign_pointer(node->slots[offset + i], slot); + rcu_assign_pointer(slot[i], child); node->count++; } } @@ -565,7 +563,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, if (nodep) *nodep = node; if (slotp) - *slotp = node ? node->slots + offset : (void **)&root->rnode; + *slotp = slot; return 0; } -- cgit v1.2.3-59-g8ed1b From d604c324524bf61c68182bb27db64656a78fe911 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:45 -0700 Subject: radix-tree: introduce radix_tree_replace_clear_tags() In addition to replacing the entry, we also clear all associated tags. This is really a one-off special for page_cache_tree_delete() which had far too much detailed knowledge about how the radix tree works. For efficiency, factor node_tag_clear() out of radix_tree_tag_clear() It can be used by radix_tree_delete_item() as well as radix_tree_replace_clear_tags(). Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 9 ++++-- lib/radix-tree.c | 76 ++++++++++++++++++++++++++++------------------ mm/filemap.c | 23 ++------------ 3 files changed, 56 insertions(+), 52 deletions(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index bad63105e37e..11c8e7cc3920 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -281,9 +281,12 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *node); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); -unsigned int -radix_tree_gang_lookup(struct radix_tree_root *root, void **results, - unsigned long first_index, unsigned int max_items); +struct radix_tree_node *radix_tree_replace_clear_tags( + struct radix_tree_root *root, + unsigned long index, void *entry); +unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, + void **results, unsigned long first_index, + unsigned int max_items); unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9d9b4b9af4b6..c7114d233b38 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -740,6 +740,26 @@ void *radix_tree_tag_set(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_tag_set); +static void node_tag_clear(struct radix_tree_root *root, + struct radix_tree_node *node, + unsigned int tag, unsigned int offset) +{ + while (node) { + if (!tag_get(node, tag, offset)) + return; + tag_clear(node, tag, offset); + if (any_tag_set(node, tag)) + return; + + offset = node->offset; + node = node->parent; + } + + /* clear the root's tag bit */ + if (root_tag_get(root, tag)) + root_tag_clear(root, tag); +} + /** * radix_tree_tag_clear - clear a tag on a radix tree node * @root: radix tree root @@ -776,28 +796,9 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, offset = radix_tree_descend(parent, &node, offset); } - if (node == NULL) - goto out; + if (node) + node_tag_clear(root, parent, tag, offset); - index >>= shift; - - while (parent) { - if (!tag_get(parent, tag, offset)) - goto out; - tag_clear(parent, tag, offset); - if (any_tag_set(parent, tag)) - goto out; - - index >>= RADIX_TREE_MAP_SHIFT; - offset = index & RADIX_TREE_MAP_MASK; - parent = parent->parent; - } - - /* clear the root's tag bit */ - if (root_tag_get(root, tag)) - root_tag_clear(root, tag); - -out: return node; } EXPORT_SYMBOL(radix_tree_tag_clear); @@ -1525,14 +1526,9 @@ void *radix_tree_delete_item(struct radix_tree_root *root, offset = get_slot_offset(node, slot); - /* - * Clear all tags associated with the item to be deleted. - * This way of doing it would be inefficient, but seldom is any set. - */ - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (tag_get(node, tag, offset)) - radix_tree_tag_clear(root, index, tag); - } + /* Clear all tags associated with the item to be deleted. */ + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + node_tag_clear(root, node, tag, offset); delete_sibling_entries(node, node_to_entry(slot), offset); node->slots[offset] = NULL; @@ -1559,6 +1555,28 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) } EXPORT_SYMBOL(radix_tree_delete); +struct radix_tree_node *radix_tree_replace_clear_tags( + struct radix_tree_root *root, + unsigned long index, void *entry) +{ + struct radix_tree_node *node; + void **slot; + + __radix_tree_lookup(root, index, &node, &slot); + + if (node) { + unsigned int tag, offset = get_slot_offset(node, slot); + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) + node_tag_clear(root, node, tag, offset); + } else { + /* Clear root node tags */ + root->gfp_mask &= __GFP_BITS_MASK; + } + + radix_tree_replace_slot(slot, entry); + return node; +} + /** * radix_tree_tagged - test whether any items in the tree are tagged * @root: radix tree root diff --git a/mm/filemap.c b/mm/filemap.c index b418405903bc..9665b1d4f318 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -114,14 +114,11 @@ static void page_cache_tree_delete(struct address_space *mapping, struct page *page, void *shadow) { struct radix_tree_node *node; - unsigned long index; - unsigned int offset; - unsigned int tag; - void **slot; VM_BUG_ON(!PageLocked(page)); - __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); + node = radix_tree_replace_clear_tags(&mapping->page_tree, page->index, + shadow); if (shadow) { mapping->nrexceptional++; @@ -135,23 +132,9 @@ static void page_cache_tree_delete(struct address_space *mapping, } mapping->nrpages--; - if (!node) { - /* Clear direct pointer tags in root node */ - mapping->page_tree.gfp_mask &= __GFP_BITS_MASK; - radix_tree_replace_slot(slot, shadow); + if (!node) return; - } - - /* Clear tree tags for the removed page */ - index = page->index; - offset = index & RADIX_TREE_MAP_MASK; - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { - if (test_bit(offset, node->tags[tag])) - radix_tree_tag_clear(&mapping->page_tree, index, tag); - } - /* Delete page, swap shadow entry */ - radix_tree_replace_slot(slot, shadow); workingset_node_pages_dec(node); if (shadow) workingset_node_shadows_inc(node); -- cgit v1.2.3-59-g8ed1b From 9e85d811196583126785a0405d0c879ae7a9eb2f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:48 -0700 Subject: radix-tree: make radix_tree_descend() more useful Now that the shift amount is stored in the node, radix_tree_descend() can calculate offset itself from index, which removes several lines of code from each of the tree walkers. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 78 +++++++++++++++++++------------------------------------- 1 file changed, 26 insertions(+), 52 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index c7114d233b38..8b7d8459bb9d 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -94,9 +94,10 @@ static inline unsigned long get_slot_offset(struct radix_tree_node *parent, return slot - parent->slots; } -static unsigned radix_tree_descend(struct radix_tree_node *parent, - struct radix_tree_node **nodep, unsigned offset) +static unsigned int radix_tree_descend(struct radix_tree_node *parent, + struct radix_tree_node **nodep, unsigned long index) { + unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; void **entry = rcu_dereference_raw(parent->slots[offset]); #ifdef CONFIG_RADIX_TREE_MULTIORDER @@ -536,8 +537,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, /* Go a level down */ node = entry_to_node(child); - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - offset = radix_tree_descend(node, &child, offset); + offset = radix_tree_descend(node, &child, index); slot = &node->slots[offset]; } @@ -625,13 +625,12 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, { struct radix_tree_node *node, *parent; unsigned long maxindex; - unsigned int shift; void **slot; restart: parent = NULL; slot = (void **)&root->rnode; - shift = radix_tree_load_root(root, &node, &maxindex); + radix_tree_load_root(root, &node, &maxindex); if (index > maxindex) return NULL; @@ -641,9 +640,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, if (node == RADIX_TREE_RETRY) goto restart; parent = entry_to_node(node); - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - offset = radix_tree_descend(parent, &node, offset); + offset = radix_tree_descend(parent, &node, index); slot = parent->slots + offset; } @@ -713,19 +710,15 @@ void *radix_tree_tag_set(struct radix_tree_root *root, { struct radix_tree_node *node, *parent; unsigned long maxindex; - unsigned int shift; - shift = radix_tree_load_root(root, &node, &maxindex); + radix_tree_load_root(root, &node, &maxindex); BUG_ON(index > maxindex); while (radix_tree_is_internal_node(node)) { unsigned offset; - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = entry_to_node(node); - offset = radix_tree_descend(parent, &node, offset); + offset = radix_tree_descend(parent, &node, index); BUG_ON(!node); if (!tag_get(parent, tag, offset)) @@ -779,21 +772,17 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, { struct radix_tree_node *node, *parent; unsigned long maxindex; - unsigned int shift; int uninitialized_var(offset); - shift = radix_tree_load_root(root, &node, &maxindex); + radix_tree_load_root(root, &node, &maxindex); if (index > maxindex) return NULL; parent = NULL; while (radix_tree_is_internal_node(node)) { - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = entry_to_node(node); - offset = radix_tree_descend(parent, &node, offset); + offset = radix_tree_descend(parent, &node, index); } if (node) @@ -823,25 +812,21 @@ int radix_tree_tag_get(struct radix_tree_root *root, { struct radix_tree_node *node, *parent; unsigned long maxindex; - unsigned int shift; if (!root_tag_get(root, tag)) return 0; - shift = radix_tree_load_root(root, &node, &maxindex); + radix_tree_load_root(root, &node, &maxindex); if (index > maxindex) return 0; if (node == NULL) return 0; while (radix_tree_is_internal_node(node)) { - int offset; - - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; + unsigned offset; parent = entry_to_node(node); - offset = radix_tree_descend(parent, &node, offset); + offset = radix_tree_descend(parent, &node, index); if (!node) return 0; @@ -874,7 +859,7 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter, void **radix_tree_next_chunk(struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags) { - unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; + unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; struct radix_tree_node *node, *child; unsigned long index, offset, maxindex; @@ -895,7 +880,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; restart: - shift = radix_tree_load_root(root, &child, &maxindex); + radix_tree_load_root(root, &child, &maxindex); if (index > maxindex) return NULL; if (!child) @@ -912,9 +897,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, do { node = entry_to_node(child); - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - offset = radix_tree_descend(node, &child, offset); + offset = radix_tree_descend(node, &child, index); if ((flags & RADIX_TREE_ITER_TAGGED) ? !tag_get(node, tag, offset) : !child) { @@ -936,7 +919,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, break; } index &= ~node_maxindex(node); - index += offset << shift; + index += offset << node->shift; /* Overflow after ~0UL */ if (!index) return NULL; @@ -952,7 +935,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, /* Update the iterator state */ iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); iter->next_index = (index | node_maxindex(node)) + 1; - __set_iter_shift(iter, shift); + __set_iter_shift(iter, node->shift); /* Construct iter->tags bit-mask from node->tags[tag] array */ if (flags & RADIX_TREE_ITER_TAGGED) { @@ -1010,10 +993,10 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, { struct radix_tree_node *parent, *node, *child; unsigned long maxindex; - unsigned int shift = radix_tree_load_root(root, &child, &maxindex); unsigned long tagged = 0; unsigned long index = *first_indexp; + radix_tree_load_root(root, &child, &maxindex); last_index = min(last_index, maxindex); if (index > last_index) return 0; @@ -1030,11 +1013,9 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, } node = entry_to_node(child); - shift -= RADIX_TREE_MAP_SHIFT; for (;;) { - unsigned offset = (index >> shift) & RADIX_TREE_MAP_MASK; - offset = radix_tree_descend(node, &child, offset); + unsigned offset = radix_tree_descend(node, &child, index); if (!child) goto next; if (!tag_get(node, iftag, offset)) @@ -1042,7 +1023,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, /* Sibling slots never have tags set on them */ if (radix_tree_is_internal_node(child)) { node = entry_to_node(child); - shift -= RADIX_TREE_MAP_SHIFT; continue; } @@ -1063,12 +1043,12 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, tag_set(parent, settag, offset); } next: - /* Go to next item at level determined by 'shift' */ - index = ((index >> shift) + 1) << shift; + /* Go to next entry in node */ + index = ((index >> node->shift) + 1) << node->shift; /* Overflow can happen when last_index is ~0UL... */ if (index > last_index || !index) break; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = (index >> node->shift) & RADIX_TREE_MAP_MASK; while (offset == 0) { /* * We've fully scanned this node. Go up. Because @@ -1076,8 +1056,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, * we do below cannot wander astray. */ node = node->parent; - shift += RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = (index >> node->shift) & RADIX_TREE_MAP_MASK; } if (is_sibling_entry(node, node->slots[offset])) goto next; @@ -1275,13 +1254,10 @@ struct locate_info { static unsigned long __locate(struct radix_tree_node *slot, void *item, unsigned long index, struct locate_info *info) { - unsigned int shift; unsigned long i; - shift = slot->shift + RADIX_TREE_MAP_SHIFT; - do { - shift -= RADIX_TREE_MAP_SHIFT; + unsigned int shift = slot->shift; for (i = (index >> shift) & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; @@ -1304,9 +1280,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, slot = node; break; } - if (i == RADIX_TREE_MAP_SIZE) - break; - } while (shift); + } while (i < RADIX_TREE_MAP_SIZE); out: if ((index == 0) && (i == RADIX_TREE_MAP_SIZE)) -- cgit v1.2.3-59-g8ed1b From fff7fb0b2d908dec779783d8eaf3d7725230f75e Mon Sep 17 00:00:00 2001 From: Zhaoxiu Zeng Date: Fri, 20 May 2016 17:03:57 -0700 Subject: lib/GCD.c: use binary GCD algorithm instead of Euclidean The binary GCD algorithm is based on the following facts: 1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2) 2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b) 3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = gcd((a+b)/2, b) Even on x86 machines with reasonable division hardware, the binary algorithm runs about 25% faster (80% the execution time) than the division-based Euclidian algorithm. On platforms like Alpha and ARMv6 where division is a function call to emulation code, it's even more significant. There are two variants of the code here, depending on whether a fast __ffs (find least significant set bit) instruction is available. This allows the unpredictable branches in the bit-at-a-time shifting loop to be eliminated. If fast __ffs is not available, the "even/odd" GCD variant is used. I use the following code to benchmark: #include #include #include #include #include #include #define swap(a, b) \ do { \ a ^= b; \ b ^= a; \ a ^= b; \ } while (0) unsigned long gcd0(unsigned long a, unsigned long b) { unsigned long r; if (a < b) { swap(a, b); } if (b == 0) return a; while ((r = a % b) != 0) { a = b; b = r; } return b; } unsigned long gcd1(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); for (;;) { a >>= __builtin_ctzl(a); if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd2(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; for (;;) { while (!(a & r)) a >>= 1; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } unsigned long gcd3(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; b >>= __builtin_ctzl(b); if (b == 1) return r & -r; for (;;) { a >>= __builtin_ctzl(a); if (a == 1) return r & -r; if (a == b) return a << __builtin_ctzl(r); if (a < b) swap(a, b); a -= b; } } unsigned long gcd4(unsigned long a, unsigned long b) { unsigned long r = a | b; if (!a || !b) return r; r &= -r; while (!(b & r)) b >>= 1; if (b == r) return r; for (;;) { while (!(a & r)) a >>= 1; if (a == r) return r; if (a == b) return a; if (a < b) swap(a, b); a -= b; a >>= 1; if (a & r) a += b; a >>= 1; } } static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = { gcd0, gcd1, gcd2, gcd3, gcd4, }; #define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0])) #if defined(__x86_64__) #define rdtscll(val) do { \ unsigned long __a,__d; \ __asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \ (val) = ((unsigned long long)__a) | (((unsigned long long)__d)<<32); \ } while(0) static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { unsigned long long start, end; unsigned long long ret; unsigned long gcd_res; rdtscll(start); gcd_res = gcd(a, b); rdtscll(end); if (end >= start) ret = end - start; else ret = ~0ULL - start + 1 + end; *res = gcd_res; return ret; } #else static inline struct timespec read_time(void) { struct timespec time; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time); return time; } static inline unsigned long long diff_time(struct timespec start, struct timespec end) { struct timespec temp; if ((end.tv_nsec - start.tv_nsec) < 0) { temp.tv_sec = end.tv_sec - start.tv_sec - 1; temp.tv_nsec = 1000000000ULL + end.tv_nsec - start.tv_nsec; } else { temp.tv_sec = end.tv_sec - start.tv_sec; temp.tv_nsec = end.tv_nsec - start.tv_nsec; } return temp.tv_sec * 1000000000ULL + temp.tv_nsec; } static unsigned long long benchmark_gcd_func(unsigned long (*gcd)(unsigned long, unsigned long), unsigned long a, unsigned long b, unsigned long *res) { struct timespec start, end; unsigned long gcd_res; start = read_time(); gcd_res = gcd(a, b); end = read_time(); *res = gcd_res; return diff_time(start, end); } #endif static inline unsigned long get_rand() { if (sizeof(long) == 8) return (unsigned long)rand() << 32 | rand(); else return rand(); } int main(int argc, char **argv) { unsigned int seed = time(0); int loops = 100; int repeats = 1000; unsigned long (*res)[TEST_ENTRIES]; unsigned long long elapsed[TEST_ENTRIES]; int i, j, k; for (;;) { int opt = getopt(argc, argv, "n:r:s:"); /* End condition always first */ if (opt == -1) break; switch (opt) { case 'n': loops = atoi(optarg); break; case 'r': repeats = atoi(optarg); break; case 's': seed = strtoul(optarg, NULL, 10); break; default: /* You won't actually get here. */ break; } } res = malloc(sizeof(unsigned long) * TEST_ENTRIES * loops); memset(elapsed, 0, sizeof(elapsed)); srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); /* Do we have args? */ unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); unsigned long long min_elapsed[TEST_ENTRIES]; for (k = 0; k < repeats; k++) { for (i = 0; i < TEST_ENTRIES; i++) { unsigned long long tmp = benchmark_gcd_func(gcd_func[i], a, b, &res[j][i]); if (k == 0 || min_elapsed[i] > tmp) min_elapsed[i] = tmp; } } for (i = 0; i < TEST_ENTRIES; i++) elapsed[i] += min_elapsed[i]; } for (i = 0; i < TEST_ENTRIES; i++) printf("gcd%d: elapsed %llu\n", i, elapsed[i]); k = 0; srand(seed); for (j = 0; j < loops; j++) { unsigned long a = get_rand(); unsigned long b = argc > optind ? strtoul(argv[optind], NULL, 10) : get_rand(); for (i = 1; i < TEST_ENTRIES; i++) { if (res[j][i] != res[j][0]) break; } if (i < TEST_ENTRIES) { if (k == 0) { k = 1; fprintf(stderr, "Error:\n"); } fprintf(stderr, "gcd(%lu, %lu): ", a, b); for (i = 0; i < TEST_ENTRIES; i++) fprintf(stderr, "%ld%s", res[j][i], i < TEST_ENTRIES - 1 ? ", " : "\n"); } } if (k == 0) fprintf(stderr, "PASS\n"); free(res); return 0; } Compiled with "-O2", on "VirtualBox 4.4.0-22-generic #38-Ubuntu x86_64" got: zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 10174 gcd1: elapsed 2120 gcd2: elapsed 2902 gcd3: elapsed 2039 gcd4: elapsed 2812 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9309 gcd1: elapsed 2280 gcd2: elapsed 2822 gcd3: elapsed 2217 gcd4: elapsed 2710 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9589 gcd1: elapsed 2098 gcd2: elapsed 2815 gcd3: elapsed 2030 gcd4: elapsed 2718 PASS zhaoxiuzeng@zhaoxiuzeng-VirtualBox:~/develop$ ./gcd -r 500000 -n 10 gcd0: elapsed 9914 gcd1: elapsed 2309 gcd2: elapsed 2779 gcd3: elapsed 2228 gcd4: elapsed 2709 PASS [akpm@linux-foundation.org: avoid #defining a CONFIG_ variable] Signed-off-by: Zhaoxiu Zeng Signed-off-by: George Spelvin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 ++ arch/alpha/Kconfig | 1 + arch/arc/Kconfig | 1 + arch/arm/mm/Kconfig | 3 ++ arch/h8300/Kconfig | 1 + arch/m32r/Kconfig | 1 + arch/m68k/Kconfig.cpu | 11 ++++++ arch/metag/Kconfig | 1 + arch/microblaze/Kconfig | 1 + arch/mips/include/asm/cpu-features.h | 10 +++++ arch/nios2/Kconfig | 1 + arch/openrisc/Kconfig | 1 + arch/parisc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/score/Kconfig | 1 + arch/sh/Kconfig | 1 + arch/sparc/Kconfig | 1 + lib/gcd.c | 77 +++++++++++++++++++++++++++++++----- 18 files changed, 107 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/arch/Kconfig b/arch/Kconfig index 8f84fd268dee..b16e74e4b5af 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -647,4 +647,7 @@ config COMPAT_OLD_SIGACTION config ARCH_NO_COHERENT_DMA_MMAP bool +config CPU_NO_EFFICIENT_FFS + def_bool n + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index fe99f894e57d..7f312d80b43b 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -26,6 +26,7 @@ config ALPHA select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION select OLD_SIGSUSPEND + select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67 help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8894f7e7e3de..0dcbacfdea4b 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -107,6 +107,7 @@ choice config ISA_ARCOMPACT bool "ARCompact ISA" + select CPU_NO_EFFICIENT_FFS help The original ARC ISA of ARC600/700 cores diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 55347662e5ed..cb569b65a54d 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -421,18 +421,21 @@ config CPU_32v3 select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v4 bool select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v4T bool select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v5 bool diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 986ea84caaed..aa232de2d4bc 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -20,6 +20,7 @@ config H8300 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_ARCH_KGDB + select CPU_NO_EFFICIENT_FFS config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index c82b29253991..3cc8498fe0fe 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -17,6 +17,7 @@ config M32R select ARCH_USES_GETTIMEOFFSET select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW + select CPU_NO_EFFICIENT_FFS config SBUS bool diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index c1beb5ae181f..8ace920ca24a 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -40,6 +40,7 @@ config M68000 select CPU_HAS_NO_MULDIV64 select CPU_HAS_NO_UNALIGNED select GENERIC_CSUM + select CPU_NO_EFFICIENT_FFS help The Freescale (was Motorola) 68000 CPU is the first generation of the well known M68K family of processors. The CPU core as well as @@ -51,6 +52,7 @@ config MCPU32 bool select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_UNALIGNED + select CPU_NO_EFFICIENT_FFS help The Freescale (was then Motorola) CPU32 is a CPU core that is based on the 68020 processor. For the most part it is used in @@ -130,6 +132,7 @@ config M5206 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5206 processor support. @@ -138,6 +141,7 @@ config M5206e depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5206e processor support. @@ -163,6 +167,7 @@ config M5249 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5249 processor support. @@ -171,6 +176,7 @@ config M525x depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale (Motorola) Coldfire 5251/5253 processor support. @@ -189,6 +195,7 @@ config M5272 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5272 processor support. @@ -217,6 +224,7 @@ config M5307 select COLDFIRE_SW_A7 select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5307 processor support. @@ -242,6 +250,7 @@ config M5407 select COLDFIRE_SW_A7 select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5407 processor support. @@ -251,6 +260,7 @@ config M547x select MMU_COLDFIRE if MMU select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale ColdFire 5470/5471/5472/5473/5474/5475 processor support. @@ -260,6 +270,7 @@ config M548x select M54xx select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale ColdFire 5480/5481/5482/5483/5484/5485 processor support. diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index e47a08d72819..5b7a45d99cfb 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -30,6 +30,7 @@ config METAG select OF select OF_EARLY_FLATTREE select SPARSE_IRQ + select CPU_NO_EFFICIENT_FFS config STACKTRACE_SUPPORT def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 3d793b55f60c..f17c3a4fb697 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -32,6 +32,7 @@ config MICROBLAZE select OF_EARLY_FLATTREE select TRACING_SUPPORT select VIRT_TO_BUS + select CPU_NO_EFFICIENT_FFS config SWAP def_bool n diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index e6f19fc61bf2..e961c8a7ea66 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -204,6 +204,16 @@ #endif #endif +/* __builtin_constant_p(cpu_has_mips_r) && cpu_has_mips_r */ +#if !((defined(cpu_has_mips32r1) && cpu_has_mips32r1) || \ + (defined(cpu_has_mips32r2) && cpu_has_mips32r2) || \ + (defined(cpu_has_mips32r6) && cpu_has_mips32r6) || \ + (defined(cpu_has_mips64r1) && cpu_has_mips64r1) || \ + (defined(cpu_has_mips64r2) && cpu_has_mips64r2) || \ + (defined(cpu_has_mips64r6) && cpu_has_mips64r6)) +#define CPU_NO_EFFICIENT_FFS 1 +#endif + #ifndef cpu_has_mips_1 # define cpu_has_mips_1 (!cpu_has_mips_r6) #endif diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 87ca653eb5f3..51a56c8b04b4 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -15,6 +15,7 @@ config NIOS2 select SOC_BUS select SPARSE_IRQ select USB_ARCH_HAS_HCD if USB_SUPPORT + select CPU_NO_EFFICIENT_FFS config GENERIC_CSUM def_bool y diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e118c02cc79a..142cb057c41b 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -25,6 +25,7 @@ config OPENRISC select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW select OR1K_PIC + select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 config MMU def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 88cfaa8af78e..3d498a676551 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -32,6 +32,7 @@ config PARISC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select ARCH_NO_COHERENT_DMA_MMAP + select CPU_NO_EFFICIENT_FFS help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1c3c43d9d1b5..a8c259059adf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -123,6 +123,7 @@ config S390 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_EARLY_PFN_TO_NID select HAVE_ARCH_JUMP_LABEL + select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 366e1b599a7b..507d63181389 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -14,6 +14,7 @@ config SCORE select VIRT_TO_BUS select MODULES_USE_ELF_REL select CLONE_BACKWARDS + select CPU_NO_EFFICIENT_FFS choice prompt "System type" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index f6254341c065..e803a836cb7c 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -20,6 +20,7 @@ config SUPERH select PERF_USE_VMALLOC select HAVE_DEBUG_KMEMLEAK select HAVE_KERNEL_GZIP + select CPU_NO_EFFICIENT_FFS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_KERNEL_XZ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 1012f7ffcdf5..546293d9e6c5 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -42,6 +42,7 @@ config SPARC select ODD_RT_SIGACTION select OLD_SIGSUSPEND select ARCH_HAS_SG_CHAIN + select CPU_NO_EFFICIENT_FFS config SPARC32 def_bool !64BIT diff --git a/lib/gcd.c b/lib/gcd.c index 3657f129d7b8..135ee6407a5e 100644 --- a/lib/gcd.c +++ b/lib/gcd.c @@ -2,20 +2,77 @@ #include #include -/* Greatest common divisor */ +/* + * This implements the binary GCD algorithm. (Often attributed to Stein, + * but as Knuth has noted, appears in a first-century Chinese math text.) + * + * This is faster than the division-based algorithm even on x86, which + * has decent hardware division. + */ + +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) && !defined(CPU_NO_EFFICIENT_FFS) + +/* If __ffs is available, the even/odd algorithm benchmarks slower. */ unsigned long gcd(unsigned long a, unsigned long b) { - unsigned long r; + unsigned long r = a | b; + + if (!a || !b) + return r; - if (a < b) - swap(a, b); + b >>= __ffs(b); + if (b == 1) + return r & -r; - if (!b) - return a; - while ((r = a % b) != 0) { - a = b; - b = r; + for (;;) { + a >>= __ffs(a); + if (a == 1) + return r & -r; + if (a == b) + return a << __ffs(r); + + if (a < b) + swap(a, b); + a -= b; } - return b; } + +#else + +/* If normalization is done by loops, the even/odd algorithm is a win. */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r = a | b; + + if (!a || !b) + return r; + + /* Isolate lsbit of r */ + r &= -r; + + while (!(b & r)) + b >>= 1; + if (b == r) + return r; + + for (;;) { + while (!(a & r)) + a >>= 1; + if (a == r) + return r; + if (a == b) + return a; + + if (a < b) + swap(a, b); + a -= b; + a >>= 1; + if (a & r) + a += b; + a >>= 1; + } +} + +#endif + EXPORT_SYMBOL_GPL(gcd); -- cgit v1.2.3-59-g8ed1b