aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig10
-rw-r--r--lib/Kconfig.debug82
-rw-r--r--lib/Kconfig.kasan4
-rw-r--r--lib/Kconfig.kgdb2
-rw-r--r--lib/Kconfig.ubsan11
-rw-r--r--lib/Makefile13
-rw-r--r--lib/atomic64.c32
-rw-r--r--lib/atomic64_test.c34
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/chacha20.c79
-rw-r--r--lib/cpu-notifier-error-inject.c46
-rw-r--r--lib/crc32.c16
-rw-r--r--lib/debugobjects.c92
-rw-r--r--lib/digsig.c16
-rw-r--r--lib/dma-debug.c92
-rw-r--r--lib/dma-noop.c9
-rw-r--r--lib/dynamic_debug.c7
-rw-r--r--lib/earlycpio.c5
-rw-r--r--lib/gcd.c77
-rw-r--r--lib/hweight.c4
-rw-r--r--lib/iommu-helper.c3
-rw-r--r--lib/iov_iter.c165
-rw-r--r--lib/irq_poll.c26
-rw-r--r--lib/mpi/mpicoder.c247
-rw-r--r--lib/nmi_backtrace.c89
-rw-r--r--lib/nodemask.c30
-rw-r--r--lib/percpu_counter.c6
-rw-r--r--lib/radix-tree.c985
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile8
-rw-r--r--lib/raid6/algos.c18
-rw-r--r--lib/raid6/avx512.c569
-rw-r--r--lib/raid6/recov_avx512.c388
-rw-r--r--lib/raid6/recov_s390xc.c116
-rw-r--r--lib/raid6/s390vx.uc168
-rw-r--r--lib/raid6/test/Makefile5
-rw-r--r--lib/raid6/test/test.c7
-rw-r--r--lib/raid6/x86.h10
-rw-r--r--lib/random32.c5
-rw-r--r--lib/ratelimit.c10
-rw-r--r--lib/rbtree.c26
-rw-r--r--lib/rhashtable.c320
-rw-r--r--lib/sg_pool.c172
-rw-r--r--lib/stackdepot.c1
-rw-r--r--lib/string_helpers.c92
-rw-r--r--lib/strncpy_from_user.c10
-rw-r--r--lib/strnlen_user.c7
-rw-r--r--lib/swiotlb.c13
-rw-r--r--lib/syscall.c15
-rw-r--r--lib/test_bpf.c1
-rw-r--r--lib/test_hash.c256
-rw-r--r--lib/test_kasan.c69
-rw-r--r--lib/test_rhashtable.c2
-rw-r--r--lib/test_uuid.c133
-rw-r--r--lib/ubsan.c2
-rw-r--r--lib/ucs2_string.c2
-rw-r--r--lib/usercopy.c9
-rw-r--r--lib/uuid.c91
-rw-r--r--lib/vsprintf.c21
-rw-r--r--lib/win_minmax.c98
60 files changed, 3681 insertions, 1148 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 3cca1222578e..d79909dc01ec 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -362,6 +362,9 @@ config INTERVAL_TREE
for more information.
+config RADIX_TREE_MULTIORDER
+ bool
+
config ASSOCIATIVE_ARRAY
bool
help
@@ -523,6 +526,13 @@ config SG_SPLIT
a scatterlist. This should be selected by a driver or an API which
whishes to split a scatterlist amongst multiple DMA channels.
+config SG_POOL
+ def_bool n
+ help
+ Provides a helper to allocate chained scatterlists. This should be
+ selected by a driver or an API which whishes to allocate chained
+ scatterlist.
+
#
# sg chaining option
#
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c0ae0fc125db..39d07e754822 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -244,6 +244,7 @@ config PAGE_OWNER
depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
select DEBUG_FS
select STACKTRACE
+ select STACKDEPOT
select PAGE_EXTENSION
help
This keeps track of what call chain is the owner of a page, may
@@ -257,6 +258,7 @@ config PAGE_OWNER
config DEBUG_FS
bool "Debug Filesystem"
+ select SRCU
help
debugfs is a virtual file system that kernel developers use to put
debugging files into. Enable this option to be able to read and
@@ -707,6 +709,8 @@ config KCOV
bool "Code coverage for fuzzing"
depends on ARCH_HAS_KCOV
select DEBUG_FS
+ select GCC_PLUGINS if !COMPILE_TEST
+ select GCC_PLUGIN_SANCOV if !COMPILE_TEST
help
KCOV exposes kernel code coverage information in a form suitable
for coverage-guided fuzzing (randomized testing).
@@ -717,6 +721,17 @@ config KCOV
For more details, see Documentation/kcov.txt.
+config KCOV_INSTRUMENT_ALL
+ bool "Instrument all code by default"
+ depends on KCOV
+ default y if KCOV
+ help
+ If you are doing generic system call fuzzing (like e.g. syzkaller),
+ then you will want to instrument the whole kernel and you should
+ say y here. If you are doing more targeted fuzzing (like e.g.
+ filesystem fuzzing with AFL) then you will want to enable coverage
+ for more specific subsets of files, and should say n here.
+
config DEBUG_SHIRQ
bool "Debug shared IRQ handlers"
depends on DEBUG_KERNEL
@@ -806,7 +821,7 @@ config DETECT_HUNG_TASK
help
Say Y here to enable the kernel to detect "hung tasks",
which are bugs that cause the task to be stuck in
- uninterruptible "D" state indefinitiley.
+ uninterruptible "D" state indefinitely.
When a hung task is detected, the kernel will print the
current stack trace (which you should report), but the
@@ -1306,22 +1321,6 @@ config RCU_PERF_TEST
Say M if you want the RCU performance tests to build as a module.
Say N if you are unsure.
-config RCU_PERF_TEST_RUNNABLE
- bool "performance tests for RCU runnable by default"
- depends on RCU_PERF_TEST = y
- default n
- help
- This option provides a way to build the RCU performance tests
- directly into the kernel without them starting up at boot time.
- You can use /sys/module to manually override this setting.
- This /proc file is available only when the RCU performance
- tests have been built into the kernel.
-
- Say Y here if you want the RCU performance tests to start during
- boot (you probably don't).
- Say N here if you want the RCU performance tests to start only
- after being manually enabled via /sys/module.
-
config RCU_TORTURE_TEST
tristate "torture tests for RCU"
depends on DEBUG_KERNEL
@@ -1339,23 +1338,6 @@ config RCU_TORTURE_TEST
Say M if you want the RCU torture tests to build as a module.
Say N if you are unsure.
-config RCU_TORTURE_TEST_RUNNABLE
- bool "torture tests for RCU runnable by default"
- depends on RCU_TORTURE_TEST = y
- default n
- help
- This option provides a way to build the RCU torture tests
- directly into the kernel without them starting up at boot
- time. You can use /proc/sys/kernel/rcutorture_runnable
- to manually override this setting. This /proc file is
- available only when the RCU torture tests have been built
- into the kernel.
-
- Say Y here if you want the RCU torture tests to start during
- boot (you probably don't).
- Say N here if you want the RCU torture tests to start only
- after being manually enabled via /proc.
-
config RCU_TORTURE_TEST_SLOW_PREINIT
bool "Slow down RCU grace-period pre-initialization to expose races"
depends on RCU_TORTURE_TEST
@@ -1704,24 +1686,6 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
-config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
- bool
-
-config DEBUG_STRICT_USER_COPY_CHECKS
- bool "Strict user copy size checks"
- depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
- depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
- help
- Enabling this option turns a certain set of sanity checks for user
- copy operations into compile time failures.
-
- The copy_from_user() etc checks are there to help test if there
- are sufficient security checks on the length argument of
- the copy operation, by having gcc prove that the argument is
- within bounds.
-
- If unsure, say N.
-
source kernel/trace/Kconfig
menu "Runtime Testing"
@@ -1840,6 +1804,9 @@ config TEST_BITMAP
If unsure, say N.
+config TEST_UUID
+ tristate "Test functions located in the uuid module at runtime"
+
config TEST_RHASHTABLE
tristate "Perform selftest on resizable hash table"
default n
@@ -1848,6 +1815,17 @@ config TEST_RHASHTABLE
If unsure, say N.
+config TEST_HASH
+ tristate "Perform selftest on hash functions"
+ default n
+ help
+ Enable this option to test the kernel's integer (<linux/hash,h>)
+ and string (<linux/stringhash.h>) hash functions on boot
+ (or module load).
+
+ This is intended to help people writing architecture-specific
+ optimized versions. If unsure, say N.
+
endmenu # runtime tests
config PROVIDE_OHCI1394_DMA_INIT
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 67d8c6838ba9..bd38aab05929 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,9 +5,9 @@ if HAVE_ARCH_KASAN
config KASAN
bool "KASan: runtime memory debugger"
- depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
+ depends on SLUB || (SLAB && !DEBUG_SLAB)
select CONSTRUCTORS
- select STACKDEPOT if SLAB
+ select STACKDEPOT
help
Enables kernel address sanitizer - runtime memory debugger,
designed to find out-of-bounds accesses and use-after-free bugs.
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index c635a107a7de..533f912638ed 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -22,7 +22,7 @@ config KGDB_SERIAL_CONSOLE
tristate "KGDB: use kgdb over the serial console"
select CONSOLE_POLL
select MAGIC_SYSRQ
- depends on TTY
+ depends on TTY && HW_CONSOLE
default y
help
Share a serial console with kgdb. Sysrq-g must be used
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 39494af9a84a..bc6e651df68c 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,6 +1,9 @@
config ARCH_HAS_UBSAN_SANITIZE_ALL
bool
+config ARCH_WANTS_UBSAN_NO_NULL
+ def_bool n
+
config UBSAN
bool "Undefined behaviour sanity checker"
help
@@ -34,3 +37,11 @@ config UBSAN_ALIGNMENT
This option enables detection of unaligned memory accesses.
Enabling this option on architectures that support unaligned
accesses may produce a lot of false positives.
+
+config UBSAN_NULL
+ bool "Enable checking of null pointers"
+ depends on UBSAN
+ default y if !ARCH_WANTS_UBSAN_NO_NULL
+ help
+ This option enables detection of memory accesses via a
+ null pointer.
diff --git a/lib/Makefile b/lib/Makefile
index a65e9a861535..df747e5eeb7a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,19 +15,15 @@ KCOV_INSTRUMENT_rbtree.o := n
KCOV_INSTRUMENT_list_debug.o := n
KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o timerqueue.o\
idr.o int_sqrt.o extable.o \
- sha1.o md5.o irq_regs.o argv_split.o \
+ sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
- earlycpio.o seq_buf.o nmi_backtrace.o
+ earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
-obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_HAS_DMA) += dma-noop.o
@@ -48,6 +44,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
obj-y += kstrtox.o
obj-$(CONFIG_TEST_BPF) += test_bpf.o
obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
+obj-$(CONFIG_TEST_HASH) += test_hash.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LKM) += test_module.o
@@ -57,6 +54,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
+obj-$(CONFIG_TEST_UUID) += test_uuid.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
@@ -72,8 +70,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_BTREE) += btree.o
@@ -178,6 +174,7 @@ obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
obj-$(CONFIG_SG_SPLIT) += sg_split.o
+obj-$(CONFIG_SG_POOL) += sg_pool.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
obj-$(CONFIG_IRQ_POLL) += irq_poll.o
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 2886ebac6567..53c2d5edc826 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -96,17 +96,41 @@ long long atomic64_##op##_return(long long a, atomic64_t *v) \
} \
EXPORT_SYMBOL(atomic64_##op##_return);
+#define ATOMIC64_FETCH_OP(op, c_op) \
+long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ raw_spinlock_t *lock = lock_addr(v); \
+ long long val; \
+ \
+ raw_spin_lock_irqsave(lock, flags); \
+ val = v->counter; \
+ v->counter c_op a; \
+ raw_spin_unlock_irqrestore(lock, flags); \
+ return val; \
+} \
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
#define ATOMIC64_OPS(op, c_op) \
ATOMIC64_OP(op, c_op) \
- ATOMIC64_OP_RETURN(op, c_op)
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
ATOMIC64_OPS(add, +=)
ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 123481814320..dbb369145dda 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -53,11 +53,25 @@ do { \
BUG_ON(atomic##bit##_read(&v) != r); \
} while (0)
+#define TEST_FETCH(bit, op, c_op, val) \
+do { \
+ atomic##bit##_set(&v, v0); \
+ r = v0; \
+ r c_op val; \
+ BUG_ON(atomic##bit##_##op(val, &v) != v0); \
+ BUG_ON(atomic##bit##_read(&v) != r); \
+} while (0)
+
#define RETURN_FAMILY_TEST(bit, op, c_op, val) \
do { \
FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \
} while (0)
+#define FETCH_FAMILY_TEST(bit, op, c_op, val) \
+do { \
+ FAMILY_TEST(TEST_FETCH, bit, op, c_op, val); \
+} while (0)
+
#define TEST_ARGS(bit, op, init, ret, expect, args...) \
do { \
atomic##bit##_set(&v, init); \
@@ -114,6 +128,16 @@ static __init void test_atomic(void)
RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
RETURN_FAMILY_TEST(, sub_return, -=, -one);
+ FETCH_FAMILY_TEST(, fetch_add, +=, onestwos);
+ FETCH_FAMILY_TEST(, fetch_add, +=, -one);
+ FETCH_FAMILY_TEST(, fetch_sub, -=, onestwos);
+ FETCH_FAMILY_TEST(, fetch_sub, -=, -one);
+
+ FETCH_FAMILY_TEST(, fetch_or, |=, v1);
+ FETCH_FAMILY_TEST(, fetch_and, &=, v1);
+ FETCH_FAMILY_TEST(, fetch_andnot, &= ~, v1);
+ FETCH_FAMILY_TEST(, fetch_xor, ^=, v1);
+
INC_RETURN_FAMILY_TEST(, v0);
DEC_RETURN_FAMILY_TEST(, v0);
@@ -154,6 +178,16 @@ static __init void test_atomic64(void)
RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
RETURN_FAMILY_TEST(64, sub_return, -=, -one);
+ FETCH_FAMILY_TEST(64, fetch_add, +=, onestwos);
+ FETCH_FAMILY_TEST(64, fetch_add, +=, -one);
+ FETCH_FAMILY_TEST(64, fetch_sub, -=, onestwos);
+ FETCH_FAMILY_TEST(64, fetch_sub, -=, -one);
+
+ FETCH_FAMILY_TEST(64, fetch_or, |=, v1);
+ FETCH_FAMILY_TEST(64, fetch_and, &=, v1);
+ FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, v1);
+ FETCH_FAMILY_TEST(64, fetch_xor, ^=, v1);
+
INIT(v0);
atomic64_inc(&v);
r += one;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c66da508cbf7..eca88087fa8a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -14,9 +14,9 @@
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/string.h>
+#include <linux/uaccess.h>
#include <asm/page.h>
-#include <asm/uaccess.h>
/*
* bitmaps provide an array of bits, implemented using an an
diff --git a/lib/chacha20.c b/lib/chacha20.c
new file mode 100644
index 000000000000..250ceed9ec9a
--- /dev/null
+++ b/lib/chacha20.c
@@ -0,0 +1,79 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/cryptohash.h>
+#include <asm/unaligned.h>
+#include <crypto/chacha20.h>
+
+static inline u32 rotl32(u32 v, u8 n)
+{
+ return (v << n) | (v >> (sizeof(v) * 8 - n));
+}
+
+extern void chacha20_block(u32 *state, void *stream)
+{
+ u32 x[16], *out = stream;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(x); i++)
+ x[i] = state[i];
+
+ for (i = 0; i < 20; i += 2) {
+ x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16);
+ x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16);
+ x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16);
+ x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16);
+
+ x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12);
+ x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12);
+ x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12);
+ x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12);
+
+ x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8);
+ x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8);
+ x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8);
+ x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8);
+
+ x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7);
+ x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7);
+ x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7);
+ x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7);
+
+ x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16);
+ x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16);
+ x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16);
+ x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16);
+
+ x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12);
+ x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12);
+ x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12);
+ x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12);
+
+ x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8);
+ x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8);
+ x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8);
+ x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8);
+
+ x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7);
+ x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7);
+ x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7);
+ x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(x); i++)
+ out[i] = cpu_to_le32(x[i] + state[i]);
+
+ state[12]++;
+}
+EXPORT_SYMBOL(chacha20_block);
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
index 707ca24f7b18..0e2c9a1e958a 100644
--- a/lib/cpu-notifier-error-inject.c
+++ b/lib/cpu-notifier-error-inject.c
@@ -8,16 +8,47 @@ static int priority;
module_param(priority, int, 0);
MODULE_PARM_DESC(priority, "specify cpu notifier priority");
+#define UP_PREPARE 0
+#define UP_PREPARE_FROZEN 0
+#define DOWN_PREPARE 0
+#define DOWN_PREPARE_FROZEN 0
+
static struct notifier_err_inject cpu_notifier_err_inject = {
.actions = {
- { NOTIFIER_ERR_INJECT_ACTION(CPU_UP_PREPARE) },
- { NOTIFIER_ERR_INJECT_ACTION(CPU_UP_PREPARE_FROZEN) },
- { NOTIFIER_ERR_INJECT_ACTION(CPU_DOWN_PREPARE) },
- { NOTIFIER_ERR_INJECT_ACTION(CPU_DOWN_PREPARE_FROZEN) },
+ { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) },
+ { NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) },
+ { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) },
+ { NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) },
{}
}
};
+static int notf_err_handle(struct notifier_err_inject_action *action)
+{
+ int ret;
+
+ ret = action->error;
+ if (ret)
+ pr_info("Injecting error (%d) to %s\n", ret, action->name);
+ return ret;
+}
+
+static int notf_err_inj_up_prepare(unsigned int cpu)
+{
+ if (!cpuhp_tasks_frozen)
+ return notf_err_handle(&cpu_notifier_err_inject.actions[0]);
+ else
+ return notf_err_handle(&cpu_notifier_err_inject.actions[1]);
+}
+
+static int notf_err_inj_dead(unsigned int cpu)
+{
+ if (!cpuhp_tasks_frozen)
+ return notf_err_handle(&cpu_notifier_err_inject.actions[2]);
+ else
+ return notf_err_handle(&cpu_notifier_err_inject.actions[3]);
+}
+
static struct dentry *dir;
static int err_inject_init(void)
@@ -29,7 +60,10 @@ static int err_inject_init(void)
if (IS_ERR(dir))
return PTR_ERR(dir);
- err = register_hotcpu_notifier(&cpu_notifier_err_inject.nb);
+ err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE,
+ "cpu-err-notif:prepare",
+ notf_err_inj_up_prepare,
+ notf_err_inj_dead);
if (err)
debugfs_remove_recursive(dir);
@@ -38,7 +72,7 @@ static int err_inject_init(void)
static void err_inject_exit(void)
{
- unregister_hotcpu_notifier(&cpu_notifier_err_inject.nb);
+ cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE);
debugfs_remove_recursive(dir);
}
diff --git a/lib/crc32.c b/lib/crc32.c
index 9a907d489d95..7fbd1a112b9d 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -979,7 +979,6 @@ static int __init crc32c_test(void)
int i;
int errors = 0;
int bytes = 0;
- struct timespec start, stop;
u64 nsec;
unsigned long flags;
@@ -999,20 +998,17 @@ static int __init crc32c_test(void)
local_irq_save(flags);
local_irq_disable();
- getnstimeofday(&start);
+ nsec = ktime_get_ns();
for (i = 0; i < 100; i++) {
if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
test[i].start, test[i].length))
errors++;
}
- getnstimeofday(&stop);
+ nsec = ktime_get_ns() - nsec;
local_irq_restore(flags);
local_irq_enable();
- nsec = stop.tv_nsec - start.tv_nsec +
- 1000000000 * (stop.tv_sec - start.tv_sec);
-
pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
if (errors)
@@ -1065,7 +1061,6 @@ static int __init crc32_test(void)
int i;
int errors = 0;
int bytes = 0;
- struct timespec start, stop;
u64 nsec;
unsigned long flags;
@@ -1088,7 +1083,7 @@ static int __init crc32_test(void)
local_irq_save(flags);
local_irq_disable();
- getnstimeofday(&start);
+ nsec = ktime_get_ns();
for (i = 0; i < 100; i++) {
if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
test[i].start, test[i].length))
@@ -1098,14 +1093,11 @@ static int __init crc32_test(void)
test[i].start, test[i].length))
errors++;
}
- getnstimeofday(&stop);
+ nsec = ktime_get_ns() - nsec;
local_irq_restore(flags);
local_irq_enable();
- nsec = stop.tv_nsec - start.tv_nsec +
- 1000000000 * (stop.tv_sec - start.tv_sec);
-
pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
CRC_LE_BITS, CRC_BE_BITS);
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 519b5a10fd70..a8e12601eb37 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -269,16 +269,15 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
* Try to repair the damage, so we have a better chance to get useful
* debug output.
*/
-static int
-debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state),
+static bool
+debug_object_fixup(bool (*fixup)(void *addr, enum debug_obj_state state),
void * addr, enum debug_obj_state state)
{
- int fixed = 0;
-
- if (fixup)
- fixed = fixup(addr, state);
- debug_objects_fixups += fixed;
- return fixed;
+ if (fixup && fixup(addr, state)) {
+ debug_objects_fixups++;
+ return true;
+ }
+ return false;
}
static void debug_object_is_on_stack(void *addr, int onstack)
@@ -416,7 +415,7 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
ret = debug_object_fixup(descr->fixup_activate, addr, state);
- return ret ? -EINVAL : 0;
+ return ret ? 0 : -EINVAL;
case ODEBUG_STATE_DESTROYED:
debug_print_object(obj, "activate");
@@ -432,14 +431,21 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr)
raw_spin_unlock_irqrestore(&db->lock, flags);
/*
- * This happens when a static object is activated. We
- * let the type specific code decide whether this is
- * true or not.
+ * We are here when a static object is activated. We
+ * let the type specific code confirm whether this is
+ * true or not. if true, we just make sure that the
+ * static object is tracked in the object tracker. If
+ * not, this must be a bug, so we try to fix it up.
*/
- if (debug_object_fixup(descr->fixup_activate, addr,
- ODEBUG_STATE_NOTAVAILABLE)) {
+ if (descr->is_static_object && descr->is_static_object(addr)) {
+ /* track this static object */
+ debug_object_init(addr, descr);
+ debug_object_activate(addr, descr);
+ } else {
debug_print_object(&o, "activate");
- return -EINVAL;
+ ret = debug_object_fixup(descr->fixup_activate, addr,
+ ODEBUG_STATE_NOTAVAILABLE);
+ return ret ? 0 : -EINVAL;
}
return 0;
}
@@ -603,12 +609,18 @@ void debug_object_assert_init(void *addr, struct debug_obj_descr *descr)
raw_spin_unlock_irqrestore(&db->lock, flags);
/*
- * Maybe the object is static. Let the type specific
- * code decide what to do.
+ * Maybe the object is static, and we let the type specific
+ * code confirm. Track this static object if true, else invoke
+ * fixup.
*/
- if (debug_object_fixup(descr->fixup_assert_init, addr,
- ODEBUG_STATE_NOTAVAILABLE))
+ if (descr->is_static_object && descr->is_static_object(addr)) {
+ /* Track this static object */
+ debug_object_init(addr, descr);
+ } else {
debug_print_object(&o, "assert_init");
+ debug_object_fixup(descr->fixup_assert_init, addr,
+ ODEBUG_STATE_NOTAVAILABLE);
+ }
return;
}
@@ -793,11 +805,18 @@ struct self_test {
static __initdata struct debug_obj_descr descr_type_test;
+static bool __init is_static_object(void *addr)
+{
+ struct self_test *obj = addr;
+
+ return obj->static_init;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
*/
-static int __init fixup_init(void *addr, enum debug_obj_state state)
+static bool __init fixup_init(void *addr, enum debug_obj_state state)
{
struct self_test *obj = addr;
@@ -805,37 +824,31 @@ static int __init fixup_init(void *addr, enum debug_obj_state state)
case ODEBUG_STATE_ACTIVE:
debug_object_deactivate(obj, &descr_type_test);
debug_object_init(obj, &descr_type_test);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
/*
* fixup_activate is called when:
* - an active object is activated
- * - an unknown object is activated (might be a statically initialized object)
+ * - an unknown non-static object is activated
*/
-static int __init fixup_activate(void *addr, enum debug_obj_state state)
+static bool __init fixup_activate(void *addr, enum debug_obj_state state)
{
struct self_test *obj = addr;
switch (state) {
case ODEBUG_STATE_NOTAVAILABLE:
- if (obj->static_init == 1) {
- debug_object_init(obj, &descr_type_test);
- debug_object_activate(obj, &descr_type_test);
- return 0;
- }
- return 1;
-
+ return true;
case ODEBUG_STATE_ACTIVE:
debug_object_deactivate(obj, &descr_type_test);
debug_object_activate(obj, &descr_type_test);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
@@ -843,7 +856,7 @@ static int __init fixup_activate(void *addr, enum debug_obj_state state)
* fixup_destroy is called when:
* - an active object is destroyed
*/
-static int __init fixup_destroy(void *addr, enum debug_obj_state state)
+static bool __init fixup_destroy(void *addr, enum debug_obj_state state)
{
struct self_test *obj = addr;
@@ -851,9 +864,9 @@ static int __init fixup_destroy(void *addr, enum debug_obj_state state)
case ODEBUG_STATE_ACTIVE:
debug_object_deactivate(obj, &descr_type_test);
debug_object_destroy(obj, &descr_type_test);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
@@ -861,7 +874,7 @@ static int __init fixup_destroy(void *addr, enum debug_obj_state state)
* fixup_free is called when:
* - an active object is freed
*/
-static int __init fixup_free(void *addr, enum debug_obj_state state)
+static bool __init fixup_free(void *addr, enum debug_obj_state state)
{
struct self_test *obj = addr;
@@ -869,9 +882,9 @@ static int __init fixup_free(void *addr, enum debug_obj_state state)
case ODEBUG_STATE_ACTIVE:
debug_object_deactivate(obj, &descr_type_test);
debug_object_free(obj, &descr_type_test);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
@@ -917,6 +930,7 @@ out:
static __initdata struct debug_obj_descr descr_type_test = {
.name = "selftest",
+ .is_static_object = is_static_object,
.fixup_init = fixup_init,
.fixup_activate = fixup_activate,
.fixup_destroy = fixup_destroy,
diff --git a/lib/digsig.c b/lib/digsig.c
index 07be6c1ef4e2..55b8b2f41a9e 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -104,21 +104,25 @@ static int digsig_verify_rsa(struct key *key,
datap = pkh->mpi;
endp = ukp->data + ukp->datalen;
- err = -ENOMEM;
-
for (i = 0; i < pkh->nmpi; i++) {
unsigned int remaining = endp - datap;
pkey[i] = mpi_read_from_buffer(datap, &remaining);
- if (!pkey[i])
+ if (IS_ERR(pkey[i])) {
+ err = PTR_ERR(pkey[i]);
goto err;
+ }
datap += remaining;
}
mblen = mpi_get_nbits(pkey[0]);
mlen = DIV_ROUND_UP(mblen, 8);
- if (mlen == 0)
+ if (mlen == 0) {
+ err = -EINVAL;
goto err;
+ }
+
+ err = -ENOMEM;
out1 = kzalloc(mlen, GFP_KERNEL);
if (!out1)
@@ -126,8 +130,10 @@ static int digsig_verify_rsa(struct key *key,
nret = siglen;
in = mpi_read_from_buffer(sig, &nret);
- if (!in)
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
goto err;
+ }
res = mpi_alloc(mpi_get_nlimbs(in) * 2);
if (!res)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 4a1515f4b452..8971370bfb16 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -22,6 +22,7 @@
#include <linux/stacktrace.h>
#include <linux/dma-debug.h>
#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/export.h>
@@ -43,6 +44,7 @@ enum {
dma_debug_page,
dma_debug_sg,
dma_debug_coherent,
+ dma_debug_resource,
};
enum map_err_types {
@@ -150,8 +152,9 @@ static const char *const maperr2str[] = {
[MAP_ERR_CHECKED] = "dma map error checked",
};
-static const char *type2name[4] = { "single", "page",
- "scather-gather", "coherent" };
+static const char *type2name[5] = { "single", "page",
+ "scather-gather", "coherent",
+ "resource" };
static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
"DMA_FROM_DEVICE", "DMA_NONE" };
@@ -253,6 +256,7 @@ static int hash_fn(struct dma_debug_entry *entry)
*/
static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
unsigned long *flags)
+ __acquires(&dma_entry_hash[idx].lock)
{
int idx = hash_fn(entry);
unsigned long __flags;
@@ -267,6 +271,7 @@ static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
*/
static void put_hash_bucket(struct hash_bucket *bucket,
unsigned long *flags)
+ __releases(&bucket->lock)
{
unsigned long __flags = *flags;
@@ -397,6 +402,9 @@ static void hash_bucket_del(struct dma_debug_entry *entry)
static unsigned long long phys_addr(struct dma_debug_entry *entry)
{
+ if (entry->type == dma_debug_resource)
+ return __pfn_to_phys(entry->pfn) + entry->offset;
+
return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
}
@@ -657,9 +665,9 @@ static struct dma_debug_entry *dma_entry_alloc(void)
spin_lock_irqsave(&free_entries_lock, flags);
if (list_empty(&free_entries)) {
- pr_err("DMA-API: debugging out of memory - disabling\n");
global_disable = true;
spin_unlock_irqrestore(&free_entries_lock, flags);
+ pr_err("DMA-API: debugging out of memory - disabling\n");
return NULL;
}
@@ -1162,11 +1170,32 @@ static void check_unmap(struct dma_debug_entry *ref)
put_hash_bucket(bucket, &flags);
}
-static void check_for_stack(struct device *dev, void *addr)
+static void check_for_stack(struct device *dev,
+ struct page *page, size_t offset)
{
- if (object_is_on_stack(addr))
- err_printk(dev, NULL, "DMA-API: device driver maps memory from "
- "stack [addr=%p]\n", addr);
+ void *addr;
+ struct vm_struct *stack_vm_area = task_stack_vm_area(current);
+
+ if (!stack_vm_area) {
+ /* Stack is direct-mapped. */
+ if (PageHighMem(page))
+ return;
+ addr = page_address(page) + offset;
+ if (object_is_on_stack(addr))
+ err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
+ } else {
+ /* Stack is vmalloced. */
+ int i;
+
+ for (i = 0; i < stack_vm_area->nr_pages; i++) {
+ if (page != stack_vm_area->pages[i])
+ continue;
+
+ addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
+ err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
+ break;
+ }
+ }
}
static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
@@ -1289,10 +1318,11 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
if (map_single)
entry->type = dma_debug_single;
+ check_for_stack(dev, page, offset);
+
if (!PageHighMem(page)) {
void *addr = page_address(page) + offset;
- check_for_stack(dev, addr);
check_for_illegal_area(dev, addr, size);
}
@@ -1384,8 +1414,9 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
entry->sg_call_ents = nents;
entry->sg_mapped_ents = mapped_ents;
+ check_for_stack(dev, sg_page(s), s->offset);
+
if (!PageHighMem(sg_page(s))) {
- check_for_stack(dev, sg_virt(s));
check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
}
@@ -1493,6 +1524,49 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
}
EXPORT_SYMBOL(debug_dma_free_coherent);
+void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
+ int direction, dma_addr_t dma_addr)
+{
+ struct dma_debug_entry *entry;
+
+ if (unlikely(dma_debug_disabled()))
+ return;
+
+ entry = dma_entry_alloc();
+ if (!entry)
+ return;
+
+ entry->type = dma_debug_resource;
+ entry->dev = dev;
+ entry->pfn = PHYS_PFN(addr);
+ entry->offset = offset_in_page(addr);
+ entry->size = size;
+ entry->dev_addr = dma_addr;
+ entry->direction = direction;
+ entry->map_err_type = MAP_ERR_NOT_CHECKED;
+
+ add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_map_resource);
+
+void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
+ size_t size, int direction)
+{
+ struct dma_debug_entry ref = {
+ .type = dma_debug_resource,
+ .dev = dev,
+ .dev_addr = dma_addr,
+ .size = size,
+ .direction = direction,
+ };
+
+ if (unlikely(dma_debug_disabled()))
+ return;
+
+ check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_unmap_resource);
+
void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
size_t size, int direction)
{
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
index 72145646857e..3d766e78fbe2 100644
--- a/lib/dma-noop.c
+++ b/lib/dma-noop.c
@@ -10,7 +10,7 @@
static void *dma_noop_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *ret;
@@ -22,7 +22,7 @@ static void *dma_noop_alloc(struct device *dev, size_t size,
static void dma_noop_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
free_pages((unsigned long)cpu_addr, get_order(size));
}
@@ -30,13 +30,14 @@ static void dma_noop_free(struct device *dev, size_t size,
static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return page_to_phys(page) + offset;
}
static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir,
+ unsigned long attrs)
{
int i;
struct scatterlist *sg;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index fe42b6ec3f0c..da796e2dc4f5 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -188,6 +188,13 @@ static int ddebug_change(const struct ddebug_query *query,
newflags = (dp->flags & mask) | flags;
if (newflags == dp->flags)
continue;
+#ifdef HAVE_JUMP_LABEL
+ if (dp->flags & _DPRINTK_FLAGS_PRINT) {
+ if (!(flags & _DPRINTK_FLAGS_PRINT))
+ static_branch_disable(&dp->key.dd_key_true);
+ } else if (flags & _DPRINTK_FLAGS_PRINT)
+ static_branch_enable(&dp->key.dd_key_true);
+#endif
dp->flags = newflags;
vpr_info("changed %s:%d [%s]%s =%s\n",
trim_prefix(dp->filename), dp->lineno,
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index 3eb3e4722b8e..db283ba4d2c1 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -125,7 +125,10 @@ struct cpio_data find_cpio_data(const char *path, void *data,
if ((ch[C_MODE] & 0170000) == 0100000 &&
ch[C_NAMESIZE] >= mypathsize &&
!memcmp(p, path, mypathsize)) {
- *nextoff = (long)nptr - (long)data;
+
+ if (nextoff)
+ *nextoff = (long)nptr - (long)data;
+
if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) {
pr_warn(
"File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
diff --git a/lib/gcd.c b/lib/gcd.c
index 3657f129d7b8..135ee6407a5e 100644
--- a/lib/gcd.c
+++ b/lib/gcd.c
@@ -2,20 +2,77 @@
#include <linux/gcd.h>
#include <linux/export.h>
-/* Greatest common divisor */
+/*
+ * This implements the binary GCD algorithm. (Often attributed to Stein,
+ * but as Knuth has noted, appears in a first-century Chinese math text.)
+ *
+ * This is faster than the division-based algorithm even on x86, which
+ * has decent hardware division.
+ */
+
+#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) && !defined(CPU_NO_EFFICIENT_FFS)
+
+/* If __ffs is available, the even/odd algorithm benchmarks slower. */
unsigned long gcd(unsigned long a, unsigned long b)
{
- unsigned long r;
+ unsigned long r = a | b;
+
+ if (!a || !b)
+ return r;
- if (a < b)
- swap(a, b);
+ b >>= __ffs(b);
+ if (b == 1)
+ return r & -r;
- if (!b)
- return a;
- while ((r = a % b) != 0) {
- a = b;
- b = r;
+ for (;;) {
+ a >>= __ffs(a);
+ if (a == 1)
+ return r & -r;
+ if (a == b)
+ return a << __ffs(r);
+
+ if (a < b)
+ swap(a, b);
+ a -= b;
}
- return b;
}
+
+#else
+
+/* If normalization is done by loops, the even/odd algorithm is a win. */
+unsigned long gcd(unsigned long a, unsigned long b)
+{
+ unsigned long r = a | b;
+
+ if (!a || !b)
+ return r;
+
+ /* Isolate lsbit of r */
+ r &= -r;
+
+ while (!(b & r))
+ b >>= 1;
+ if (b == r)
+ return r;
+
+ for (;;) {
+ while (!(a & r))
+ a >>= 1;
+ if (a == r)
+ return r;
+ if (a == b)
+ return a;
+
+ if (a < b)
+ swap(a, b);
+ a -= b;
+ a >>= 1;
+ if (a & r)
+ a += b;
+ a >>= 1;
+ }
+}
+
+#endif
+
EXPORT_SYMBOL_GPL(gcd);
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221558..43273a7d83cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/
+#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned int __sw_hweight32(unsigned int w)
{
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
#endif
}
EXPORT_SYMBOL(__sw_hweight32);
+#endif
unsigned int __sw_hweight16(unsigned int w)
{
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
}
EXPORT_SYMBOL(__sw_hweight8);
+#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned long __sw_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
#endif
}
EXPORT_SYMBOL(__sw_hweight64);
+#endif
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index c27e269210c4..a816f3a80625 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -29,8 +29,7 @@ again:
index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
if (index < size) {
if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
- /* we could do more effectively */
- start = index + 1;
+ start = ALIGN(shift + index, boundary_size) - shift;
goto again;
}
bitmap_set(map, index, nr);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ca5316e0087b..7e3138cfc8c9 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -56,37 +56,24 @@
n = wanted; \
}
-#define iterate_bvec(i, n, __v, __p, skip, STEP) { \
- size_t wanted = n; \
- __p = i->bvec; \
- __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \
- if (likely(__v.bv_len)) { \
- __v.bv_page = __p->bv_page; \
- __v.bv_offset = __p->bv_offset + skip; \
- (void)(STEP); \
- skip += __v.bv_len; \
- n -= __v.bv_len; \
- } \
- while (unlikely(n)) { \
- __p++; \
- __v.bv_len = min_t(size_t, n, __p->bv_len); \
- if (unlikely(!__v.bv_len)) \
+#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
+ struct bvec_iter __start; \
+ __start.bi_size = n; \
+ __start.bi_bvec_done = skip; \
+ __start.bi_idx = 0; \
+ for_each_bvec(__v, i->bvec, __bi, __start) { \
+ if (!__v.bv_len) \
continue; \
- __v.bv_page = __p->bv_page; \
- __v.bv_offset = __p->bv_offset; \
(void)(STEP); \
- skip = __v.bv_len; \
- n -= __v.bv_len; \
} \
- n = wanted; \
}
#define iterate_all_kinds(i, n, v, I, B, K) { \
size_t skip = i->iov_offset; \
if (unlikely(i->type & ITER_BVEC)) { \
- const struct bio_vec *bvec; \
struct bio_vec v; \
- iterate_bvec(i, n, v, bvec, skip, (B)) \
+ struct bvec_iter __bi; \
+ iterate_bvec(i, n, v, __bi, skip, (B)) \
} else if (unlikely(i->type & ITER_KVEC)) { \
const struct kvec *kvec; \
struct kvec v; \
@@ -99,40 +86,42 @@
}
#define iterate_and_advance(i, n, v, I, B, K) { \
- size_t skip = i->iov_offset; \
- if (unlikely(i->type & ITER_BVEC)) { \
- const struct bio_vec *bvec; \
- struct bio_vec v; \
- iterate_bvec(i, n, v, bvec, skip, (B)) \
- if (skip == bvec->bv_len) { \
- bvec++; \
- skip = 0; \
- } \
- i->nr_segs -= bvec - i->bvec; \
- i->bvec = bvec; \
- } else if (unlikely(i->type & ITER_KVEC)) { \
- const struct kvec *kvec; \
- struct kvec v; \
- iterate_kvec(i, n, v, kvec, skip, (K)) \
- if (skip == kvec->iov_len) { \
- kvec++; \
- skip = 0; \
- } \
- i->nr_segs -= kvec - i->kvec; \
- i->kvec = kvec; \
- } else { \
- const struct iovec *iov; \
- struct iovec v; \
- iterate_iovec(i, n, v, iov, skip, (I)) \
- if (skip == iov->iov_len) { \
- iov++; \
- skip = 0; \
+ if (unlikely(i->count < n)) \
+ n = i->count; \
+ if (i->count) { \
+ size_t skip = i->iov_offset; \
+ if (unlikely(i->type & ITER_BVEC)) { \
+ const struct bio_vec *bvec = i->bvec; \
+ struct bio_vec v; \
+ struct bvec_iter __bi; \
+ iterate_bvec(i, n, v, __bi, skip, (B)) \
+ i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
+ i->nr_segs -= i->bvec - bvec; \
+ skip = __bi.bi_bvec_done; \
+ } else if (unlikely(i->type & ITER_KVEC)) { \
+ const struct kvec *kvec; \
+ struct kvec v; \
+ iterate_kvec(i, n, v, kvec, skip, (K)) \
+ if (skip == kvec->iov_len) { \
+ kvec++; \
+ skip = 0; \
+ } \
+ i->nr_segs -= kvec - i->kvec; \
+ i->kvec = kvec; \
+ } else { \
+ const struct iovec *iov; \
+ struct iovec v; \
+ iterate_iovec(i, n, v, iov, skip, (I)) \
+ if (skip == iov->iov_len) { \
+ iov++; \
+ skip = 0; \
+ } \
+ i->nr_segs -= iov - i->iov; \
+ i->iov = iov; \
} \
- i->nr_segs -= iov - i->iov; \
- i->iov = iov; \
+ i->count -= n; \
+ i->iov_offset = skip; \
} \
- i->count -= n; \
- i->iov_offset = skip; \
}
static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
@@ -155,7 +144,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
buf = iov->iov_base + skip;
copy = min(bytes, iov->iov_len - skip);
- if (!fault_in_pages_writeable(buf, copy)) {
+ if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
kaddr = kmap_atomic(page);
from = kaddr + offset;
@@ -186,6 +175,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
copy = min(bytes, iov->iov_len - skip);
}
/* Too bad - revert to non-atomic kmap */
+
kaddr = kmap(page);
from = kaddr + offset;
left = __copy_to_user(buf, from, copy);
@@ -204,6 +194,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
bytes -= copy;
}
kunmap(page);
+
done:
if (skip == iov->iov_len) {
iov++;
@@ -236,7 +227,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
buf = iov->iov_base + skip;
copy = min(bytes, iov->iov_len - skip);
- if (!fault_in_pages_readable(buf, copy)) {
+ if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
kaddr = kmap_atomic(page);
to = kaddr + offset;
@@ -267,6 +258,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
copy = min(bytes, iov->iov_len - skip);
}
/* Too bad - revert to non-atomic kmap */
+
kaddr = kmap(page);
to = kaddr + offset;
left = __copy_from_user(to, buf, copy);
@@ -285,6 +277,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
bytes -= copy;
}
kunmap(page);
+
done:
if (skip == iov->iov_len) {
iov++;
@@ -298,33 +291,13 @@ done:
}
/*
- * Fault in the first iovec of the given iov_iter, to a maximum length
- * of bytes. Returns 0 on success, or non-zero if the memory could not be
- * accessed (ie. because it is an invalid address).
- *
- * writev-intensive code may want this to prefault several iovecs -- that
- * would be possible (callers must not rely on the fact that _only_ the
- * first iovec will be faulted with the current implementation).
- */
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
-{
- if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
- char __user *buf = i->iov->iov_base + i->iov_offset;
- bytes = min(bytes, i->iov->iov_len - i->iov_offset);
- return fault_in_pages_readable(buf, bytes);
- }
- return 0;
-}
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
-
-/*
* Fault in one or more iovecs of the given iov_iter, to a maximum length of
* bytes. For each iovec, fault in each page that constitutes the iovec.
*
* Return 0 on success, or non-zero if the memory could not be accessed (i.e.
* because it is an invalid address).
*/
-int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
{
size_t skip = i->iov_offset;
const struct iovec *iov;
@@ -341,7 +314,7 @@ int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
}
return 0;
}
-EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
void iov_iter_init(struct iov_iter *i, int direction,
const struct iovec *iov, unsigned long nr_segs,
@@ -386,12 +359,6 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
const char *from = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
iterate_and_advance(i, bytes, v,
__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
v.iov_len),
@@ -407,12 +374,6 @@ EXPORT_SYMBOL(copy_to_iter);
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
iterate_and_advance(i, bytes, v,
__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
v.iov_len),
@@ -428,12 +389,6 @@ EXPORT_SYMBOL(copy_from_iter);
size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
iterate_and_advance(i, bytes, v,
__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
v.iov_base, v.iov_len),
@@ -474,12 +429,6 @@ EXPORT_SYMBOL(copy_page_from_iter);
size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
{
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
iterate_and_advance(i, bytes, v,
__clear_user(v.iov_base, v.iov_len),
memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@ -685,12 +634,6 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
char *to = addr;
__wsum sum, next;
size_t off = 0;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
sum = *csum;
iterate_and_advance(i, bytes, v, ({
int err = 0;
@@ -729,12 +672,6 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
const char *from = addr;
__wsum sum, next;
size_t off = 0;
- if (unlikely(bytes > i->count))
- bytes = i->count;
-
- if (unlikely(!bytes))
- return 0;
-
sum = *csum;
iterate_and_advance(i, bytes, v, ({
int err = 0;
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 836f7db4e548..2be55692aa43 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -184,30 +184,21 @@ void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
}
EXPORT_SYMBOL(irq_poll_init);
-static int irq_poll_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int irq_poll_cpu_dead(unsigned int cpu)
{
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
- int cpu = (unsigned long) hcpu;
-
- local_irq_disable();
- list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
- this_cpu_ptr(&blk_cpu_iopoll));
- __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
- local_irq_enable();
- }
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+ this_cpu_ptr(&blk_cpu_iopoll));
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+ local_irq_enable();
- return NOTIFY_OK;
+ return 0;
}
-static struct notifier_block irq_poll_cpu_notifier = {
- .notifier_call = irq_poll_cpu_notify,
-};
-
static __init int irq_poll_setup(void)
{
int i;
@@ -216,7 +207,8 @@ static __init int irq_poll_setup(void)
INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
- register_hotcpu_notifier(&irq_poll_cpu_notifier);
+ cpuhp_setup_state_nocalls(CPUHP_IRQ_POLL_DEAD, "irq_poll:dead", NULL,
+ irq_poll_cpu_dead);
return 0;
}
subsys_initcall(irq_poll_setup);
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 747606f9e4a3..5a0f75a3bf01 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -21,6 +21,7 @@
#include <linux/bitops.h>
#include <linux/count_zeros.h>
#include <linux/byteorder/generic.h>
+#include <linux/scatterlist.h>
#include <linux/string.h>
#include "mpi-internal.h"
@@ -50,9 +51,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes)
return NULL;
}
if (nbytes > 0)
- nbits -= count_leading_zeros(buffer[0]);
- else
- nbits = 0;
+ nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8);
nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
@@ -82,50 +81,30 @@ EXPORT_SYMBOL_GPL(mpi_read_raw_data);
MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
{
const uint8_t *buffer = xbuffer;
- int i, j;
- unsigned nbits, nbytes, nlimbs, nread = 0;
- mpi_limb_t a;
- MPI val = NULL;
+ unsigned int nbits, nbytes;
+ MPI val;
if (*ret_nread < 2)
- goto leave;
+ return ERR_PTR(-EINVAL);
nbits = buffer[0] << 8 | buffer[1];
if (nbits > MAX_EXTERN_MPI_BITS) {
pr_info("MPI: mpi too large (%u bits)\n", nbits);
- goto leave;
+ return ERR_PTR(-EINVAL);
}
- buffer += 2;
- nread = 2;
nbytes = DIV_ROUND_UP(nbits, 8);
- nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
- val = mpi_alloc(nlimbs);
- if (!val)
- return NULL;
- i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
- i %= BYTES_PER_MPI_LIMB;
- val->nbits = nbits;
- j = val->nlimbs = nlimbs;
- val->sign = 0;
- for (; j > 0; j--) {
- a = 0;
- for (; i < BYTES_PER_MPI_LIMB; i++) {
- if (++nread > *ret_nread) {
- printk
- ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
- nread, *ret_nread);
- goto leave;
- }
- a <<= 8;
- a |= *buffer++;
- }
- i = 0;
- val->d[j - 1] = a;
+ if (nbytes + 2 > *ret_nread) {
+ pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n",
+ nbytes, *ret_nread);
+ return ERR_PTR(-EINVAL);
}
-leave:
- *ret_nread = nread;
+ val = mpi_read_raw_data(buffer + 2, nbytes);
+ if (!val)
+ return ERR_PTR(-ENOMEM);
+
+ *ret_nread = nbytes + 2;
return val;
}
EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
@@ -250,82 +229,6 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
}
EXPORT_SYMBOL_GPL(mpi_get_buffer);
-/****************
- * Use BUFFER to update MPI.
- */
-int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
-{
- const uint8_t *buffer = xbuffer, *p;
- mpi_limb_t alimb;
- int nlimbs;
- int i;
-
- nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
- if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
- return -ENOMEM;
- a->sign = sign;
-
- for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
-#if BYTES_PER_MPI_LIMB == 4
- alimb = (mpi_limb_t) *p--;
- alimb |= (mpi_limb_t) *p-- << 8;
- alimb |= (mpi_limb_t) *p-- << 16;
- alimb |= (mpi_limb_t) *p-- << 24;
-#elif BYTES_PER_MPI_LIMB == 8
- alimb = (mpi_limb_t) *p--;
- alimb |= (mpi_limb_t) *p-- << 8;
- alimb |= (mpi_limb_t) *p-- << 16;
- alimb |= (mpi_limb_t) *p-- << 24;
- alimb |= (mpi_limb_t) *p-- << 32;
- alimb |= (mpi_limb_t) *p-- << 40;
- alimb |= (mpi_limb_t) *p-- << 48;
- alimb |= (mpi_limb_t) *p-- << 56;
-#else
-#error please implement for this limb size.
-#endif
- a->d[i++] = alimb;
- }
- if (p >= buffer) {
-#if BYTES_PER_MPI_LIMB == 4
- alimb = *p--;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 8;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 16;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 24;
-#elif BYTES_PER_MPI_LIMB == 8
- alimb = (mpi_limb_t) *p--;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 8;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 16;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 24;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 32;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 40;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 48;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 56;
-#else
-#error please implement for this limb size.
-#endif
- a->d[i++] = alimb;
- }
- a->nlimbs = i;
-
- if (i != nlimbs) {
- pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
- nlimbs);
- BUG();
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(mpi_set_buffer);
-
/**
* mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first)
*
@@ -335,16 +238,13 @@ EXPORT_SYMBOL_GPL(mpi_set_buffer);
* @a: a multi precision integer
* @sgl: scatterlist to write to. Needs to be at least
* mpi_get_size(a) long.
- * @nbytes: in/out param - it has the be set to the maximum number of
- * bytes that can be written to sgl. This has to be at least
- * the size of the integer a. On return it receives the actual
- * length of the data written on success or the data that would
- * be written if buffer was too small.
+ * @nbytes: the number of bytes to write. Leading bytes will be
+ * filled with zero.
* @sign: if not NULL, it will be set to the sign of a.
*
* Return: 0 on success or error code in case of error
*/
-int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
+int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes,
int *sign)
{
u8 *p, *p2;
@@ -356,55 +256,60 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
#error please implement for this limb size.
#endif
unsigned int n = mpi_get_size(a);
- int i, x, y = 0, lzeros, buf_len;
-
- if (!nbytes)
- return -EINVAL;
+ struct sg_mapping_iter miter;
+ int i, x, buf_len;
+ int nents;
if (sign)
*sign = a->sign;
- lzeros = count_lzeros(a);
-
- if (*nbytes < n - lzeros) {
- *nbytes = n - lzeros;
+ if (nbytes < n)
return -EOVERFLOW;
- }
- *nbytes = n - lzeros;
- buf_len = sgl->length;
- p2 = sg_virt(sgl);
+ nents = sg_nents_for_len(sgl, nbytes);
+ if (nents < 0)
+ return -EINVAL;
- for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
- lzeros %= BYTES_PER_MPI_LIMB;
- i >= 0; i--) {
+ sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG);
+ sg_miter_next(&miter);
+ buf_len = miter.length;
+ p2 = miter.addr;
+
+ while (nbytes > n) {
+ i = min_t(unsigned, nbytes - n, buf_len);
+ memset(p2, 0, i);
+ p2 += i;
+ nbytes -= i;
+
+ buf_len -= i;
+ if (!buf_len) {
+ sg_miter_next(&miter);
+ buf_len = miter.length;
+ p2 = miter.addr;
+ }
+ }
+
+ for (i = a->nlimbs - 1; i >= 0; i--) {
#if BYTES_PER_MPI_LIMB == 4
- alimb = cpu_to_be32(a->d[i]);
+ alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0;
#elif BYTES_PER_MPI_LIMB == 8
- alimb = cpu_to_be64(a->d[i]);
+ alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0;
#else
#error please implement for this limb size.
#endif
- if (lzeros) {
- y = lzeros;
- lzeros = 0;
- }
-
- p = (u8 *)&alimb + y;
+ p = (u8 *)&alimb;
- for (x = 0; x < sizeof(alimb) - y; x++) {
- if (!buf_len) {
- sgl = sg_next(sgl);
- if (!sgl)
- return -EINVAL;
- buf_len = sgl->length;
- p2 = sg_virt(sgl);
- }
+ for (x = 0; x < sizeof(alimb); x++) {
*p2++ = *p++;
- buf_len--;
+ if (!--buf_len) {
+ sg_miter_next(&miter);
+ buf_len = miter.length;
+ p2 = miter.addr;
+ }
}
- y = 0;
}
+
+ sg_miter_stop(&miter);
return 0;
}
EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
@@ -424,19 +329,23 @@ EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
*/
MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
{
- struct scatterlist *sg;
- int x, i, j, z, lzeros, ents;
+ struct sg_mapping_iter miter;
unsigned int nbits, nlimbs;
+ int x, j, z, lzeros, ents;
+ unsigned int len;
+ const u8 *buff;
mpi_limb_t a;
MPI val = NULL;
- lzeros = 0;
- ents = sg_nents(sgl);
+ ents = sg_nents_for_len(sgl, nbytes);
+ if (ents < 0)
+ return NULL;
- for_each_sg(sgl, sg, ents, i) {
- const u8 *buff = sg_virt(sg);
- int len = sg->length;
+ sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG);
+ lzeros = 0;
+ len = 0;
+ while (nbytes > 0) {
while (len && !*buff) {
lzeros++;
len--;
@@ -446,12 +355,17 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
if (len && *buff)
break;
- ents--;
+ sg_miter_next(&miter);
+ buff = miter.addr;
+ len = miter.length;
+
nbytes -= lzeros;
lzeros = 0;
}
- sgl = sg;
+ miter.consumed = lzeros;
+ sg_miter_stop(&miter);
+
nbytes -= lzeros;
nbits = nbytes * 8;
if (nbits > MAX_EXTERN_MPI_BITS) {
@@ -460,8 +374,7 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
}
if (nbytes > 0)
- nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros)) -
- (BITS_PER_LONG - 8);
+ nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8);
nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
@@ -480,21 +393,21 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
z %= BYTES_PER_MPI_LIMB;
- for_each_sg(sgl, sg, ents, i) {
- const u8 *buffer = sg_virt(sg) + lzeros;
- int len = sg->length - lzeros;
+ while (sg_miter_next(&miter)) {
+ buff = miter.addr;
+ len = miter.length;
for (x = 0; x < len; x++) {
a <<= 8;
- a |= *buffer++;
+ a |= *buff++;
if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) {
val->d[j--] = a;
a = 0;
}
}
z += x;
- lzeros = 0;
}
+
return val;
}
EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 6019c53c669e..26caf51cc238 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -16,33 +16,14 @@
#include <linux/delay.h>
#include <linux/kprobes.h>
#include <linux/nmi.h>
-#include <linux/seq_buf.h>
#ifdef arch_trigger_all_cpu_backtrace
/* For reliability, we're prepared to waste bits here. */
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-static cpumask_t printtrace_mask;
-
-#define NMI_BUF_SIZE 4096
-
-struct nmi_seq_buf {
- unsigned char buffer[NMI_BUF_SIZE];
- struct seq_buf seq;
-};
-
-/* Safe printing in NMI context */
-static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
/* "in progress" flag of arch_trigger_all_cpu_backtrace */
static unsigned long backtrace_flag;
-static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
-{
- const char *buf = s->buffer + start;
-
- printk("%.*s", (end - start) + 1, buf);
-}
-
/*
* When raise() is called it will be is passed a pointer to the
* backtrace_mask. Architectures that call nmi_cpu_backtrace()
@@ -52,8 +33,7 @@ static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
void nmi_trigger_all_cpu_backtrace(bool include_self,
void (*raise)(cpumask_t *mask))
{
- struct nmi_seq_buf *s;
- int i, cpu, this_cpu = get_cpu();
+ int i, this_cpu = get_cpu();
if (test_and_set_bit(0, &backtrace_flag)) {
/*
@@ -68,17 +48,6 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
if (!include_self)
cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
- cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask));
-
- /*
- * Set up per_cpu seq_buf buffers that the NMIs running on the other
- * CPUs will write to.
- */
- for_each_cpu(cpu, to_cpumask(backtrace_mask)) {
- s = &per_cpu(nmi_print_seq, cpu);
- seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
- }
-
if (!cpumask_empty(to_cpumask(backtrace_mask))) {
pr_info("Sending NMI to %s CPUs:\n",
(include_self ? "all" : "other"));
@@ -94,73 +63,25 @@ void nmi_trigger_all_cpu_backtrace(bool include_self,
}
/*
- * Now that all the NMIs have triggered, we can dump out their
- * back traces safely to the console.
+ * Force flush any remote buffers that might be stuck in IRQ context
+ * and therefore could not run their irq_work.
*/
- for_each_cpu(cpu, &printtrace_mask) {
- int len, last_i = 0;
+ printk_nmi_flush();
- s = &per_cpu(nmi_print_seq, cpu);
- len = seq_buf_used(&s->seq);
- if (!len)
- continue;
-
- /* Print line by line. */
- for (i = 0; i < len; i++) {
- if (s->buffer[i] == '\n') {
- print_seq_line(s, last_i, i);
- last_i = i + 1;
- }
- }
- /* Check if there was a partial line. */
- if (last_i < len) {
- print_seq_line(s, last_i, len - 1);
- pr_cont("\n");
- }
- }
-
- clear_bit(0, &backtrace_flag);
- smp_mb__after_atomic();
+ clear_bit_unlock(0, &backtrace_flag);
put_cpu();
}
-/*
- * It is not safe to call printk() directly from NMI handlers.
- * It may be fine if the NMI detected a lock up and we have no choice
- * but to do so, but doing a NMI on all other CPUs to get a back trace
- * can be done with a sysrq-l. We don't want that to lock up, which
- * can happen if the NMI interrupts a printk in progress.
- *
- * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
- * the content into a per cpu seq_buf buffer. Then when the NMIs are
- * all done, we can safely dump the contents of the seq_buf to a printk()
- * from a non NMI context.
- */
-static int nmi_vprintk(const char *fmt, va_list args)
-{
- struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
- unsigned int len = seq_buf_used(&s->seq);
-
- seq_buf_vprintf(&s->seq, fmt, args);
- return seq_buf_used(&s->seq) - len;
-}
-
bool nmi_cpu_backtrace(struct pt_regs *regs)
{
int cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- printk_func_t printk_func_save = this_cpu_read(printk_func);
-
- /* Replace printk to write into the NMI seq */
- this_cpu_write(printk_func, nmi_vprintk);
pr_warn("NMI backtrace for cpu %d\n", cpu);
if (regs)
show_regs(regs);
else
dump_stack();
- this_cpu_write(printk_func, printk_func_save);
-
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return true;
}
diff --git a/lib/nodemask.c b/lib/nodemask.c
new file mode 100644
index 000000000000..e42a5bf44d33
--- /dev/null
+++ b/lib/nodemask.c
@@ -0,0 +1,30 @@
+#include <linux/nodemask.h>
+#include <linux/module.h>
+#include <linux/random.h>
+
+int __next_node_in(int node, const nodemask_t *srcp)
+{
+ int ret = __next_node(node, srcp);
+
+ if (ret == MAX_NUMNODES)
+ ret = __first_node(srcp);
+ return ret;
+}
+EXPORT_SYMBOL(__next_node_in);
+
+#ifdef CONFIG_NUMA
+/*
+ * Return the bit number of a random bit set in the nodemask.
+ * (returns NUMA_NO_NODE if nodemask is empty)
+ */
+int node_random(const nodemask_t *maskp)
+{
+ int w, bit = NUMA_NO_NODE;
+
+ w = nodes_weight(*maskp);
+ if (w)
+ bit = bitmap_ord_to_pos(maskp->bits,
+ get_random_int() % w, MAX_NUMNODES);
+ return bit;
+}
+#endif
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index f051d69f0910..72d36113ccaa 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -19,7 +19,7 @@ static DEFINE_SPINLOCK(percpu_counters_lock);
static struct debug_obj_descr percpu_counter_debug_descr;
-static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
+static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
{
struct percpu_counter *fbc = addr;
@@ -27,9 +27,9 @@ static int percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
case ODEBUG_STATE_ACTIVE:
percpu_counter_destroy(fbc);
debug_object_free(fbc, &percpu_counter_debug_descr);
- return 1;
+ return true;
default:
- return 0;
+ return false;
}
}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1624c4117961..8e6d552c40dd 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -4,6 +4,8 @@
* Copyright (C) 2005 SGI, Christoph Lameter
* Copyright (C) 2006 Nick Piggin
* Copyright (C) 2012 Konstantin Khlebnikov
+ * Copyright (C) 2016 Intel, Matthew Wilcox
+ * Copyright (C) 2016 Intel, Ross Zwisler
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -36,11 +38,8 @@
#include <linux/preempt.h> /* in_interrupt() */
-/*
- * The height_to_maxindex array needs to be one deeper than the maximum
- * path as height 0 holds only 1 entry.
- */
-static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
+/* Number of nodes in fully populated tree of given height */
+static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
/*
* Radix tree node cache.
@@ -64,20 +63,58 @@ static struct kmem_cache *radix_tree_node_cachep;
* Per-cpu pool of preloaded nodes
*/
struct radix_tree_preload {
- int nr;
+ unsigned nr;
/* nodes->private_data points to next preallocated node */
struct radix_tree_node *nodes;
};
static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
-static inline void *ptr_to_indirect(void *ptr)
+static inline void *node_to_entry(void *ptr)
{
- return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
+ return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
}
-static inline void *indirect_to_ptr(void *ptr)
+#define RADIX_TREE_RETRY node_to_entry(NULL)
+
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+/* Sibling slots point directly to another slot in the same node */
+static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node)
{
- return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
+ void **ptr = node;
+ return (parent->slots <= ptr) &&
+ (ptr < parent->slots + RADIX_TREE_MAP_SIZE);
+}
+#else
+static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node)
+{
+ return false;
+}
+#endif
+
+static inline unsigned long get_slot_offset(struct radix_tree_node *parent,
+ void **slot)
+{
+ return slot - parent->slots;
+}
+
+static unsigned int radix_tree_descend(struct radix_tree_node *parent,
+ struct radix_tree_node **nodep, unsigned long index)
+{
+ unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
+ void **entry = rcu_dereference_raw(parent->slots[offset]);
+
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+ if (radix_tree_is_internal_node(entry)) {
+ if (is_sibling_entry(parent, entry)) {
+ void **sibentry = (void **) entry_to_node(entry);
+ offset = get_slot_offset(parent, sibentry);
+ entry = rcu_dereference_raw(*sibentry);
+ }
+ }
+#endif
+
+ *nodep = (void *)entry;
+ return offset;
}
static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
@@ -108,7 +145,7 @@ static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
}
-static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
+static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
{
root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
}
@@ -120,7 +157,12 @@ static inline void root_tag_clear_all(struct radix_tree_root *root)
static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
{
- return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+ return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+}
+
+static inline unsigned root_tags_get(struct radix_tree_root *root)
+{
+ return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT;
}
/*
@@ -129,7 +171,7 @@ static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
*/
static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
{
- int idx;
+ unsigned idx;
for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
if (node->tags[tag][idx])
return 1;
@@ -173,38 +215,45 @@ radix_tree_find_next_bit(const unsigned long *addr,
return size;
}
-#if 0
-static void dump_node(void *slot, int height, int offset)
+#ifndef __KERNEL__
+static void dump_node(struct radix_tree_node *node, unsigned long index)
{
- struct radix_tree_node *node;
- int i;
+ unsigned long i;
- if (!slot)
- return;
+ pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n",
+ node, node->offset,
+ node->tags[0][0], node->tags[1][0], node->tags[2][0],
+ node->shift, node->count, node->parent);
- if (height == 0) {
- pr_debug("radix entry %p offset %d\n", slot, offset);
- return;
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+ unsigned long first = index | (i << node->shift);
+ unsigned long last = first | ((1UL << node->shift) - 1);
+ void *entry = node->slots[i];
+ if (!entry)
+ continue;
+ if (is_sibling_entry(node, entry)) {
+ pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n",
+ entry, i,
+ *(void **)entry_to_node(entry),
+ first, last);
+ } else if (!radix_tree_is_internal_node(entry)) {
+ pr_debug("radix entry %p offset %ld indices %ld-%ld\n",
+ entry, i, first, last);
+ } else {
+ dump_node(entry_to_node(entry), first);
+ }
}
-
- node = indirect_to_ptr(slot);
- pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n",
- slot, offset, node->tags[0][0], node->tags[1][0],
- node->tags[2][0], node->path, node->count, node->parent);
-
- for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
- dump_node(node->slots[i], height - 1, i);
}
/* For debug */
static void radix_tree_dump(struct radix_tree_root *root)
{
- pr_debug("radix root: %p height %d rnode %p tags %x\n",
- root, root->height, root->rnode,
+ pr_debug("radix root: %p rnode %p tags %x\n",
+ root, root->rnode,
root->gfp_mask >> __GFP_BITS_SHIFT);
- if (!radix_tree_is_indirect_ptr(root->rnode))
+ if (!radix_tree_is_internal_node(root->rnode))
return;
- dump_node(root->rnode, root->height, 0);
+ dump_node(entry_to_node(root->rnode), 0);
}
#endif
@@ -219,19 +268,20 @@ radix_tree_node_alloc(struct radix_tree_root *root)
gfp_t gfp_mask = root_gfp_mask(root);
/*
- * Preload code isn't irq safe and it doesn't make sence to use
- * preloading in the interrupt anyway as all the allocations have to
- * be atomic. So just do normal allocation when in interrupt.
+ * Preload code isn't irq safe and it doesn't make sense to use
+ * preloading during an interrupt anyway as all the allocations have
+ * to be atomic. So just do normal allocation when in interrupt.
*/
if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
struct radix_tree_preload *rtp;
/*
* Even if the caller has preloaded, try to allocate from the
- * cache first for the new node to get accounted.
+ * cache first for the new node to get accounted to the memory
+ * cgroup.
*/
ret = kmem_cache_alloc(radix_tree_node_cachep,
- gfp_mask | __GFP_ACCOUNT | __GFP_NOWARN);
+ gfp_mask | __GFP_NOWARN);
if (ret)
goto out;
@@ -254,10 +304,9 @@ radix_tree_node_alloc(struct radix_tree_root *root)
kmemleak_update_trace(ret);
goto out;
}
- ret = kmem_cache_alloc(radix_tree_node_cachep,
- gfp_mask | __GFP_ACCOUNT);
+ ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
out:
- BUG_ON(radix_tree_is_indirect_ptr(ret));
+ BUG_ON(radix_tree_is_internal_node(ret));
return ret;
}
@@ -296,22 +345,28 @@ radix_tree_node_free(struct radix_tree_node *node)
* To make use of this facility, the radix tree must be initialised without
* __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
*/
-static int __radix_tree_preload(gfp_t gfp_mask)
+static int __radix_tree_preload(gfp_t gfp_mask, int nr)
{
struct radix_tree_preload *rtp;
struct radix_tree_node *node;
int ret = -ENOMEM;
+ /*
+ * Nodes preloaded by one cgroup can be be used by another cgroup, so
+ * they should never be accounted to any particular memory cgroup.
+ */
+ gfp_mask &= ~__GFP_ACCOUNT;
+
preempt_disable();
rtp = this_cpu_ptr(&radix_tree_preloads);
- while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+ while (rtp->nr < nr) {
preempt_enable();
node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
if (node == NULL)
goto out;
preempt_disable();
rtp = this_cpu_ptr(&radix_tree_preloads);
- if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+ if (rtp->nr < nr) {
node->private_data = rtp->nodes;
rtp->nodes = node;
rtp->nr++;
@@ -337,7 +392,7 @@ int radix_tree_preload(gfp_t gfp_mask)
{
/* Warn on non-sensical use... */
WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
- return __radix_tree_preload(gfp_mask);
+ return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
}
EXPORT_SYMBOL(radix_tree_preload);
@@ -349,7 +404,7 @@ EXPORT_SYMBOL(radix_tree_preload);
int radix_tree_maybe_preload(gfp_t gfp_mask)
{
if (gfpflags_allow_blocking(gfp_mask))
- return __radix_tree_preload(gfp_mask);
+ return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
/* Preloading doesn't help anything with this gfp mask, skip it */
preempt_disable();
return 0;
@@ -357,38 +412,103 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
EXPORT_SYMBOL(radix_tree_maybe_preload);
/*
- * Return the maximum key which can be store into a
- * radix tree with height HEIGHT.
+ * The same as function above, but preload number of nodes required to insert
+ * (1 << order) continuous naturally-aligned elements.
*/
-static inline unsigned long radix_tree_maxindex(unsigned int height)
+int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
{
- return height_to_maxindex[height];
+ unsigned long nr_subtrees;
+ int nr_nodes, subtree_height;
+
+ /* Preloading doesn't help anything with this gfp mask, skip it */
+ if (!gfpflags_allow_blocking(gfp_mask)) {
+ preempt_disable();
+ return 0;
+ }
+
+ /*
+ * Calculate number and height of fully populated subtrees it takes to
+ * store (1 << order) elements.
+ */
+ nr_subtrees = 1 << order;
+ for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
+ subtree_height++)
+ nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
+
+ /*
+ * The worst case is zero height tree with a single item at index 0 and
+ * then inserting items starting at ULONG_MAX - (1 << order).
+ *
+ * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
+ * 0-index item.
+ */
+ nr_nodes = RADIX_TREE_MAX_PATH;
+
+ /* Plus branch to fully populated subtrees. */
+ nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
+
+ /* Root node is shared. */
+ nr_nodes--;
+
+ /* Plus nodes required to build subtrees. */
+ nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
+
+ return __radix_tree_preload(gfp_mask, nr_nodes);
+}
+
+/*
+ * The maximum index which can be stored in a radix tree
+ */
+static inline unsigned long shift_maxindex(unsigned int shift)
+{
+ return (RADIX_TREE_MAP_SIZE << shift) - 1;
+}
+
+static inline unsigned long node_maxindex(struct radix_tree_node *node)
+{
+ return shift_maxindex(node->shift);
+}
+
+static unsigned radix_tree_load_root(struct radix_tree_root *root,
+ struct radix_tree_node **nodep, unsigned long *maxindex)
+{
+ struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+
+ *nodep = node;
+
+ if (likely(radix_tree_is_internal_node(node))) {
+ node = entry_to_node(node);
+ *maxindex = node_maxindex(node);
+ return node->shift + RADIX_TREE_MAP_SHIFT;
+ }
+
+ *maxindex = 0;
+ return 0;
}
/*
* Extend a radix tree so it can store key @index.
*/
static int radix_tree_extend(struct radix_tree_root *root,
- unsigned long index, unsigned order)
+ unsigned long index, unsigned int shift)
{
- struct radix_tree_node *node;
struct radix_tree_node *slot;
- unsigned int height;
+ unsigned int maxshift;
int tag;
- /* Figure out what the height should be. */
- height = root->height + 1;
- while (index > radix_tree_maxindex(height))
- height++;
+ /* Figure out what the shift should be. */
+ maxshift = shift;
+ while (index > shift_maxindex(maxshift))
+ maxshift += RADIX_TREE_MAP_SHIFT;
- if ((root->rnode == NULL) && (order == 0)) {
- root->height = height;
+ slot = root->rnode;
+ if (!slot)
goto out;
- }
do {
- unsigned int newheight;
- if (!(node = radix_tree_node_alloc(root)))
+ struct radix_tree_node *node = radix_tree_node_alloc(root);
+
+ if (!node)
return -ENOMEM;
/* Propagate the aggregated tag info into the new root */
@@ -397,25 +517,20 @@ static int radix_tree_extend(struct radix_tree_root *root,
tag_set(node, tag, 0);
}
- /* Increase the height. */
- newheight = root->height+1;
- BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK);
- node->path = newheight;
+ BUG_ON(shift > BITS_PER_LONG);
+ node->shift = shift;
+ node->offset = 0;
node->count = 1;
node->parent = NULL;
- slot = root->rnode;
- if (radix_tree_is_indirect_ptr(slot) && newheight > 1) {
- slot = indirect_to_ptr(slot);
- slot->parent = node;
- slot = ptr_to_indirect(slot);
- }
+ if (radix_tree_is_internal_node(slot))
+ entry_to_node(slot)->parent = node;
node->slots[0] = slot;
- node = ptr_to_indirect(node);
- rcu_assign_pointer(root->rnode, node);
- root->height = newheight;
- } while (height > root->height);
+ slot = node_to_entry(node);
+ rcu_assign_pointer(root->rnode, slot);
+ shift += RADIX_TREE_MAP_SHIFT;
+ } while (shift <= maxshift);
out:
- return 0;
+ return maxshift + RADIX_TREE_MAP_SHIFT;
}
/**
@@ -439,71 +554,70 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
unsigned order, struct radix_tree_node **nodep,
void ***slotp)
{
- struct radix_tree_node *node = NULL, *slot;
- unsigned int height, shift, offset;
- int error;
+ struct radix_tree_node *node = NULL, *child;
+ void **slot = (void **)&root->rnode;
+ unsigned long maxindex;
+ unsigned int shift, offset = 0;
+ unsigned long max = index | ((1UL << order) - 1);
- BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT));
+ shift = radix_tree_load_root(root, &child, &maxindex);
/* Make sure the tree is high enough. */
- if (index > radix_tree_maxindex(root->height)) {
- error = radix_tree_extend(root, index, order);
- if (error)
+ if (max > maxindex) {
+ int error = radix_tree_extend(root, max, shift);
+ if (error < 0)
return error;
+ shift = error;
+ child = root->rnode;
+ if (order == shift)
+ shift += RADIX_TREE_MAP_SHIFT;
}
- slot = root->rnode;
-
- height = root->height;
- shift = height * RADIX_TREE_MAP_SHIFT;
-
- offset = 0; /* uninitialised var warning */
while (shift > order) {
- if (slot == NULL) {
+ shift -= RADIX_TREE_MAP_SHIFT;
+ if (child == NULL) {
/* Have to add a child node. */
- if (!(slot = radix_tree_node_alloc(root)))
+ child = radix_tree_node_alloc(root);
+ if (!child)
return -ENOMEM;
- slot->path = height;
- slot->parent = node;
- if (node) {
- rcu_assign_pointer(node->slots[offset],
- ptr_to_indirect(slot));
+ child->shift = shift;
+ child->offset = offset;
+ child->parent = node;
+ rcu_assign_pointer(*slot, node_to_entry(child));
+ if (node)
node->count++;
- slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT;
- } else
- rcu_assign_pointer(root->rnode,
- ptr_to_indirect(slot));
- } else if (!radix_tree_is_indirect_ptr(slot))
+ } else if (!radix_tree_is_internal_node(child))
break;
/* Go a level down */
- height--;
- shift -= RADIX_TREE_MAP_SHIFT;
- offset = (index >> shift) & RADIX_TREE_MAP_MASK;
- node = indirect_to_ptr(slot);
- slot = node->slots[offset];
+ node = entry_to_node(child);
+ offset = radix_tree_descend(node, &child, index);
+ slot = &node->slots[offset];
}
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
/* Insert pointers to the canonical entry */
- if ((shift - order) > 0) {
- int i, n = 1 << (shift - order);
+ if (order > shift) {
+ unsigned i, n = 1 << (order - shift);
offset = offset & ~(n - 1);
- slot = ptr_to_indirect(&node->slots[offset]);
+ slot = &node->slots[offset];
+ child = node_to_entry(slot);
for (i = 0; i < n; i++) {
- if (node->slots[offset + i])
+ if (slot[i])
return -EEXIST;
}
for (i = 1; i < n; i++) {
- rcu_assign_pointer(node->slots[offset + i], slot);
+ rcu_assign_pointer(slot[i], child);
node->count++;
}
}
+#endif
if (nodep)
*nodep = node;
if (slotp)
- *slotp = node ? node->slots + offset : (void **)&root->rnode;
+ *slotp = slot;
return 0;
}
@@ -523,7 +637,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
void **slot;
int error;
- BUG_ON(radix_tree_is_indirect_ptr(item));
+ BUG_ON(radix_tree_is_internal_node(item));
error = __radix_tree_create(root, index, order, &node, &slot);
if (error)
@@ -533,12 +647,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
rcu_assign_pointer(*slot, item);
if (node) {
+ unsigned offset = get_slot_offset(node, slot);
node->count++;
- BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK));
- BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK));
+ BUG_ON(tag_get(node, 0, offset));
+ BUG_ON(tag_get(node, 1, offset));
+ BUG_ON(tag_get(node, 2, offset));
} else {
- BUG_ON(root_tag_get(root, 0));
- BUG_ON(root_tag_get(root, 1));
+ BUG_ON(root_tags_get(root));
}
return 0;
@@ -563,44 +678,25 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
struct radix_tree_node **nodep, void ***slotp)
{
struct radix_tree_node *node, *parent;
- unsigned int height, shift;
+ unsigned long maxindex;
void **slot;
- node = rcu_dereference_raw(root->rnode);
- if (node == NULL)
+ restart:
+ parent = NULL;
+ slot = (void **)&root->rnode;
+ radix_tree_load_root(root, &node, &maxindex);
+ if (index > maxindex)
return NULL;
- if (!radix_tree_is_indirect_ptr(node)) {
- if (index > 0)
- return NULL;
+ while (radix_tree_is_internal_node(node)) {
+ unsigned offset;
- if (nodep)
- *nodep = NULL;
- if (slotp)
- *slotp = (void **)&root->rnode;
- return node;
+ if (node == RADIX_TREE_RETRY)
+ goto restart;
+ parent = entry_to_node(node);
+ offset = radix_tree_descend(parent, &node, index);
+ slot = parent->slots + offset;
}
- node = indirect_to_ptr(node);
-
- height = node->path & RADIX_TREE_HEIGHT_MASK;
- if (index > radix_tree_maxindex(height))
- return NULL;
-
- shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
- do {
- parent = node;
- slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK);
- node = rcu_dereference_raw(*slot);
- if (node == NULL)
- return NULL;
- if (!radix_tree_is_indirect_ptr(node))
- break;
- node = indirect_to_ptr(node);
-
- shift -= RADIX_TREE_MAP_SHIFT;
- height--;
- } while (height > 0);
if (nodep)
*nodep = parent;
@@ -654,59 +750,72 @@ EXPORT_SYMBOL(radix_tree_lookup);
* radix_tree_tag_set - set a tag on a radix tree node
* @root: radix tree root
* @index: index key
- * @tag: tag index
+ * @tag: tag index
*
* Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
* corresponding to @index in the radix tree. From
* the root all the way down to the leaf node.
*
- * Returns the address of the tagged item. Setting a tag on a not-present
+ * Returns the address of the tagged item. Setting a tag on a not-present
* item is a bug.
*/
void *radix_tree_tag_set(struct radix_tree_root *root,
unsigned long index, unsigned int tag)
{
- unsigned int height, shift;
- struct radix_tree_node *slot;
+ struct radix_tree_node *node, *parent;
+ unsigned long maxindex;
- height = root->height;
- BUG_ON(index > radix_tree_maxindex(height));
+ radix_tree_load_root(root, &node, &maxindex);
+ BUG_ON(index > maxindex);
- slot = indirect_to_ptr(root->rnode);
- shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ while (radix_tree_is_internal_node(node)) {
+ unsigned offset;
- while (height > 0) {
- int offset;
+ parent = entry_to_node(node);
+ offset = radix_tree_descend(parent, &node, index);
+ BUG_ON(!node);
- offset = (index >> shift) & RADIX_TREE_MAP_MASK;
- if (!tag_get(slot, tag, offset))
- tag_set(slot, tag, offset);
- slot = slot->slots[offset];
- BUG_ON(slot == NULL);
- if (!radix_tree_is_indirect_ptr(slot))
- break;
- slot = indirect_to_ptr(slot);
- shift -= RADIX_TREE_MAP_SHIFT;
- height--;
+ if (!tag_get(parent, tag, offset))
+ tag_set(parent, tag, offset);
}
/* set the root's tag bit */
- if (slot && !root_tag_get(root, tag))
+ if (!root_tag_get(root, tag))
root_tag_set(root, tag);
- return slot;
+ return node;
}
EXPORT_SYMBOL(radix_tree_tag_set);
+static void node_tag_clear(struct radix_tree_root *root,
+ struct radix_tree_node *node,
+ unsigned int tag, unsigned int offset)
+{
+ while (node) {
+ if (!tag_get(node, tag, offset))
+ return;
+ tag_clear(node, tag, offset);
+ if (any_tag_set(node, tag))
+ return;
+
+ offset = node->offset;
+ node = node->parent;
+ }
+
+ /* clear the root's tag bit */
+ if (root_tag_get(root, tag))
+ root_tag_clear(root, tag);
+}
+
/**
* radix_tree_tag_clear - clear a tag on a radix tree node
* @root: radix tree root
* @index: index key
- * @tag: tag index
+ * @tag: tag index
*
* Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
- * corresponding to @index in the radix tree. If
- * this causes the leaf node to have no tags set then clear the tag in the
+ * corresponding to @index in the radix tree. If this causes
+ * the leaf node to have no tags set then clear the tag in the
* next-to-leaf node, etc.
*
* Returns the address of the tagged item on success, else NULL. ie:
@@ -715,52 +824,25 @@ EXPORT_SYMBOL(radix_tree_tag_set);
void *radix_tree_tag_clear(struct radix_tree_root *root,
unsigned long index, unsigned int tag)
{
- struct radix_tree_node *node = NULL;
- struct radix_tree_node *slot = NULL;
- unsigned int height, shift;
+ struct radix_tree_node *node, *parent;
+ unsigned long maxindex;
int uninitialized_var(offset);
- height = root->height;
- if (index > radix_tree_maxindex(height))
- goto out;
-
- shift = height * RADIX_TREE_MAP_SHIFT;
- slot = root->rnode;
+ radix_tree_load_root(root, &node, &maxindex);
+ if (index > maxindex)
+ return NULL;
- while (shift) {
- if (slot == NULL)
- goto out;
- if (!radix_tree_is_indirect_ptr(slot))
- break;
- slot = indirect_to_ptr(slot);
+ parent = NULL;
- shift -= RADIX_TREE_MAP_SHIFT;
- offset = (index >> shift) & RADIX_TREE_MAP_MASK;
- node = slot;
- slot = slot->slots[offset];
+ while (radix_tree_is_internal_node(node)) {
+ parent = entry_to_node(node);
+ offset = radix_tree_descend(parent, &node, index);
}
- if (slot == NULL)
- goto out;
+ if (node)
+ node_tag_clear(root, parent, tag, offset);
- while (node) {
- if (!tag_get(node, tag, offset))
- goto out;
- tag_clear(node, tag, offset);
- if (any_tag_set(node, tag))
- goto out;
-
- index >>= RADIX_TREE_MAP_SHIFT;
- offset = index & RADIX_TREE_MAP_MASK;
- node = node->parent;
- }
-
- /* clear the root's tag bit */
- if (root_tag_get(root, tag))
- root_tag_clear(root, tag);
-
-out:
- return slot;
+ return node;
}
EXPORT_SYMBOL(radix_tree_tag_clear);
@@ -768,7 +850,7 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
* radix_tree_tag_get - get a tag on a radix tree node
* @root: radix tree root
* @index: index key
- * @tag: tag index (< RADIX_TREE_MAX_TAGS)
+ * @tag: tag index (< RADIX_TREE_MAX_TAGS)
*
* Return values:
*
@@ -782,48 +864,44 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
int radix_tree_tag_get(struct radix_tree_root *root,
unsigned long index, unsigned int tag)
{
- unsigned int height, shift;
- struct radix_tree_node *node;
+ struct radix_tree_node *node, *parent;
+ unsigned long maxindex;
- /* check the root's tag bit */
if (!root_tag_get(root, tag))
return 0;
- node = rcu_dereference_raw(root->rnode);
- if (node == NULL)
+ radix_tree_load_root(root, &node, &maxindex);
+ if (index > maxindex)
return 0;
-
- if (!radix_tree_is_indirect_ptr(node))
- return (index == 0);
- node = indirect_to_ptr(node);
-
- height = node->path & RADIX_TREE_HEIGHT_MASK;
- if (index > radix_tree_maxindex(height))
+ if (node == NULL)
return 0;
- shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ while (radix_tree_is_internal_node(node)) {
+ unsigned offset;
- for ( ; ; ) {
- int offset;
+ parent = entry_to_node(node);
+ offset = radix_tree_descend(parent, &node, index);
- if (node == NULL)
+ if (!node)
return 0;
- node = indirect_to_ptr(node);
-
- offset = (index >> shift) & RADIX_TREE_MAP_MASK;
- if (!tag_get(node, tag, offset))
+ if (!tag_get(parent, tag, offset))
return 0;
- if (height == 1)
- return 1;
- node = rcu_dereference_raw(node->slots[offset]);
- if (!radix_tree_is_indirect_ptr(node))
- return 1;
- shift -= RADIX_TREE_MAP_SHIFT;
- height--;
+ if (node == RADIX_TREE_RETRY)
+ break;
}
+
+ return 1;
}
EXPORT_SYMBOL(radix_tree_tag_get);
+static inline void __set_iter_shift(struct radix_tree_iter *iter,
+ unsigned int shift)
+{
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+ iter->shift = shift;
+#endif
+}
+
/**
* radix_tree_next_chunk - find next chunk of slots for iteration
*
@@ -835,9 +913,9 @@ EXPORT_SYMBOL(radix_tree_tag_get);
void **radix_tree_next_chunk(struct radix_tree_root *root,
struct radix_tree_iter *iter, unsigned flags)
{
- unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
- struct radix_tree_node *rnode, *node;
- unsigned long index, offset, height;
+ unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
+ struct radix_tree_node *node, *child;
+ unsigned long index, offset, maxindex;
if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
return NULL;
@@ -855,33 +933,28 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
if (!index && iter->index)
return NULL;
- rnode = rcu_dereference_raw(root->rnode);
- if (radix_tree_is_indirect_ptr(rnode)) {
- rnode = indirect_to_ptr(rnode);
- } else if (rnode && !index) {
+ restart:
+ radix_tree_load_root(root, &child, &maxindex);
+ if (index > maxindex)
+ return NULL;
+ if (!child)
+ return NULL;
+
+ if (!radix_tree_is_internal_node(child)) {
/* Single-slot tree */
- iter->index = 0;
- iter->next_index = 1;
+ iter->index = index;
+ iter->next_index = maxindex + 1;
iter->tags = 1;
+ __set_iter_shift(iter, 0);
return (void **)&root->rnode;
- } else
- return NULL;
-
-restart:
- height = rnode->path & RADIX_TREE_HEIGHT_MASK;
- shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
- offset = index >> shift;
+ }
- /* Index outside of the tree */
- if (offset >= RADIX_TREE_MAP_SIZE)
- return NULL;
+ do {
+ node = entry_to_node(child);
+ offset = radix_tree_descend(node, &child, index);
- node = rnode;
- while (1) {
- struct radix_tree_node *slot;
if ((flags & RADIX_TREE_ITER_TAGGED) ?
- !test_bit(offset, node->tags[tag]) :
- !node->slots[offset]) {
+ !tag_get(node, tag, offset) : !child) {
/* Hole detected */
if (flags & RADIX_TREE_ITER_CONTIG)
return NULL;
@@ -893,35 +966,30 @@ restart:
offset + 1);
else
while (++offset < RADIX_TREE_MAP_SIZE) {
- if (node->slots[offset])
+ void *slot = node->slots[offset];
+ if (is_sibling_entry(node, slot))
+ continue;
+ if (slot)
break;
}
- index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
- index += offset << shift;
+ index &= ~node_maxindex(node);
+ index += offset << node->shift;
/* Overflow after ~0UL */
if (!index)
return NULL;
if (offset == RADIX_TREE_MAP_SIZE)
goto restart;
+ child = rcu_dereference_raw(node->slots[offset]);
}
- /* This is leaf-node */
- if (!shift)
- break;
-
- slot = rcu_dereference_raw(node->slots[offset]);
- if (slot == NULL)
+ if ((child == NULL) || (child == RADIX_TREE_RETRY))
goto restart;
- if (!radix_tree_is_indirect_ptr(slot))
- break;
- node = indirect_to_ptr(slot);
- shift -= RADIX_TREE_MAP_SHIFT;
- offset = (index >> shift) & RADIX_TREE_MAP_MASK;
- }
+ } while (radix_tree_is_internal_node(child));
/* Update the iterator state */
- iter->index = index;
- iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
+ iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
+ iter->next_index = (index | node_maxindex(node)) + 1;
+ __set_iter_shift(iter, node->shift);
/* Construct iter->tags bit-mask from node->tags[tag] array */
if (flags & RADIX_TREE_ITER_TAGGED) {
@@ -967,7 +1035,7 @@ EXPORT_SYMBOL(radix_tree_next_chunk);
* set is outside the range we are scanning. This reults in dangling tags and
* can lead to problems with later tag operations (e.g. livelocks on lookups).
*
- * The function returns number of leaves where the tag was set and sets
+ * The function returns the number of leaves where the tag was set and sets
* *first_indexp to the first unscanned index.
* WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
* be prepared to handle that.
@@ -977,14 +1045,13 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
unsigned long nr_to_tag,
unsigned int iftag, unsigned int settag)
{
- unsigned int height = root->height;
- struct radix_tree_node *node = NULL;
- struct radix_tree_node *slot;
- unsigned int shift;
+ struct radix_tree_node *parent, *node, *child;
+ unsigned long maxindex;
unsigned long tagged = 0;
unsigned long index = *first_indexp;
- last_index = min(last_index, radix_tree_maxindex(height));
+ radix_tree_load_root(root, &child, &maxindex);
+ last_index = min(last_index, maxindex);
if (index > last_index)
return 0;
if (!nr_to_tag)
@@ -993,80 +1060,62 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
*first_indexp = last_index + 1;
return 0;
}
- if (height == 0) {
+ if (!radix_tree_is_internal_node(child)) {
*first_indexp = last_index + 1;
root_tag_set(root, settag);
return 1;
}
- shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
- slot = indirect_to_ptr(root->rnode);
+ node = entry_to_node(child);
for (;;) {
- unsigned long upindex;
- int offset;
-
- offset = (index >> shift) & RADIX_TREE_MAP_MASK;
- if (!slot->slots[offset])
+ unsigned offset = radix_tree_descend(node, &child, index);
+ if (!child)
goto next;
- if (!tag_get(slot, iftag, offset))
+ if (!tag_get(node, iftag, offset))
goto next;
- if (shift) {
- node = slot;
- slot = slot->slots[offset];
- if (radix_tree_is_indirect_ptr(slot)) {
- slot = indirect_to_ptr(slot);
- shift -= RADIX_TREE_MAP_SHIFT;
- continue;
- } else {
- slot = node;
- node = node->parent;
- }
+ /* Sibling slots never have tags set on them */
+ if (radix_tree_is_internal_node(child)) {
+ node = entry_to_node(child);
+ continue;
}
/* tag the leaf */
- tagged += 1 << shift;
- tag_set(slot, settag, offset);
+ tagged++;
+ tag_set(node, settag, offset);
/* walk back up the path tagging interior nodes */
- upindex = index;
- while (node) {
- upindex >>= RADIX_TREE_MAP_SHIFT;
- offset = upindex & RADIX_TREE_MAP_MASK;
-
+ parent = node;
+ for (;;) {
+ offset = parent->offset;
+ parent = parent->parent;
+ if (!parent)
+ break;
/* stop if we find a node with the tag already set */
- if (tag_get(node, settag, offset))
+ if (tag_get(parent, settag, offset))
break;
- tag_set(node, settag, offset);
- node = node->parent;
+ tag_set(parent, settag, offset);
}
-
- /*
- * Small optimization: now clear that node pointer.
- * Since all of this slot's ancestors now have the tag set
- * from setting it above, we have no further need to walk
- * back up the tree setting tags, until we update slot to
- * point to another radix_tree_node.
- */
- node = NULL;
-
-next:
- /* Go to next item at level determined by 'shift' */
- index = ((index >> shift) + 1) << shift;
+ next:
+ /* Go to next entry in node */
+ index = ((index >> node->shift) + 1) << node->shift;
/* Overflow can happen when last_index is ~0UL... */
if (index > last_index || !index)
break;
- if (tagged >= nr_to_tag)
- break;
- while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
+ offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
+ while (offset == 0) {
/*
* We've fully scanned this node. Go up. Because
* last_index is guaranteed to be in the tree, what
* we do below cannot wander astray.
*/
- slot = slot->parent;
- shift += RADIX_TREE_MAP_SHIFT;
+ node = node->parent;
+ offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
}
+ if (is_sibling_entry(node, node->slots[offset]))
+ goto next;
+ if (tagged >= nr_to_tag)
+ break;
}
/*
* We need not to tag the root tag if there is no tag which is set with
@@ -1095,9 +1144,10 @@ EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
*
* Like radix_tree_lookup, radix_tree_gang_lookup may be called under
* rcu_read_lock. In this case, rather than the returned results being
- * an atomic snapshot of the tree at a single point in time, the semantics
- * of an RCU protected gang lookup are as though multiple radix_tree_lookups
- * have been issued in individual locks, and results stored in 'results'.
+ * an atomic snapshot of the tree at a single point in time, the
+ * semantics of an RCU protected gang lookup are as though multiple
+ * radix_tree_lookups have been issued in individual locks, and results
+ * stored in 'results'.
*/
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
@@ -1114,7 +1164,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
results[ret] = rcu_dereference_raw(*slot);
if (!results[ret])
continue;
- if (radix_tree_is_indirect_ptr(results[ret])) {
+ if (radix_tree_is_internal_node(results[ret])) {
slot = radix_tree_iter_retry(&iter);
continue;
}
@@ -1197,7 +1247,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
results[ret] = rcu_dereference_raw(*slot);
if (!results[ret])
continue;
- if (radix_tree_is_indirect_ptr(results[ret])) {
+ if (radix_tree_is_internal_node(results[ret])) {
slot = radix_tree_iter_retry(&iter);
continue;
}
@@ -1247,58 +1297,48 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
#if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
#include <linux/sched.h> /* for cond_resched() */
+struct locate_info {
+ unsigned long found_index;
+ bool stop;
+};
+
/*
* This linear search is at present only useful to shmem_unuse_inode().
*/
static unsigned long __locate(struct radix_tree_node *slot, void *item,
- unsigned long index, unsigned long *found_index)
+ unsigned long index, struct locate_info *info)
{
- unsigned int shift, height;
unsigned long i;
- height = slot->path & RADIX_TREE_HEIGHT_MASK;
- shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-
- for ( ; height > 1; height--) {
- i = (index >> shift) & RADIX_TREE_MAP_MASK;
- for (;;) {
- if (slot->slots[i] != NULL)
- break;
- index &= ~((1UL << shift) - 1);
- index += 1UL << shift;
- if (index == 0)
- goto out; /* 32-bit wraparound */
- i++;
- if (i == RADIX_TREE_MAP_SIZE)
+ do {
+ unsigned int shift = slot->shift;
+
+ for (i = (index >> shift) & RADIX_TREE_MAP_MASK;
+ i < RADIX_TREE_MAP_SIZE;
+ i++, index += (1UL << shift)) {
+ struct radix_tree_node *node =
+ rcu_dereference_raw(slot->slots[i]);
+ if (node == RADIX_TREE_RETRY)
goto out;
- }
-
- slot = rcu_dereference_raw(slot->slots[i]);
- if (slot == NULL)
- goto out;
- if (!radix_tree_is_indirect_ptr(slot)) {
- if (slot == item) {
- *found_index = index + i;
- index = 0;
- } else {
- index += shift;
+ if (!radix_tree_is_internal_node(node)) {
+ if (node == item) {
+ info->found_index = index;
+ info->stop = true;
+ goto out;
+ }
+ continue;
}
- goto out;
+ node = entry_to_node(node);
+ if (is_sibling_entry(slot, node))
+ continue;
+ slot = node;
+ break;
}
- slot = indirect_to_ptr(slot);
- shift -= RADIX_TREE_MAP_SHIFT;
- }
+ } while (i < RADIX_TREE_MAP_SIZE);
- /* Bottom level: check items */
- for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
- if (slot->slots[i] == item) {
- *found_index = index + i;
- index = 0;
- goto out;
- }
- }
- index += RADIX_TREE_MAP_SIZE;
out:
+ if ((index == 0) && (i == RADIX_TREE_MAP_SIZE))
+ info->stop = true;
return index;
}
@@ -1316,32 +1356,35 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
struct radix_tree_node *node;
unsigned long max_index;
unsigned long cur_index = 0;
- unsigned long found_index = -1;
+ struct locate_info info = {
+ .found_index = -1,
+ .stop = false,
+ };
do {
rcu_read_lock();
node = rcu_dereference_raw(root->rnode);
- if (!radix_tree_is_indirect_ptr(node)) {
+ if (!radix_tree_is_internal_node(node)) {
rcu_read_unlock();
if (node == item)
- found_index = 0;
+ info.found_index = 0;
break;
}
- node = indirect_to_ptr(node);
- max_index = radix_tree_maxindex(node->path &
- RADIX_TREE_HEIGHT_MASK);
+ node = entry_to_node(node);
+
+ max_index = node_maxindex(node);
if (cur_index > max_index) {
rcu_read_unlock();
break;
}
- cur_index = __locate(node, item, cur_index, &found_index);
+ cur_index = __locate(node, item, cur_index, &info);
rcu_read_unlock();
cond_resched();
- } while (cur_index != 0 && cur_index <= max_index);
+ } while (!info.stop && cur_index <= max_index);
- return found_index;
+ return info.found_index;
}
#else
unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
@@ -1351,47 +1394,45 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
#endif /* CONFIG_SHMEM && CONFIG_SWAP */
/**
- * radix_tree_shrink - shrink height of a radix tree to minimal
+ * radix_tree_shrink - shrink radix tree to minimum height
* @root radix tree root
*/
-static inline void radix_tree_shrink(struct radix_tree_root *root)
+static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
- /* try to shrink tree height */
- while (root->height > 0) {
- struct radix_tree_node *to_free = root->rnode;
- struct radix_tree_node *slot;
+ bool shrunk = false;
+
+ for (;;) {
+ struct radix_tree_node *node = root->rnode;
+ struct radix_tree_node *child;
- BUG_ON(!radix_tree_is_indirect_ptr(to_free));
- to_free = indirect_to_ptr(to_free);
+ if (!radix_tree_is_internal_node(node))
+ break;
+ node = entry_to_node(node);
/*
* The candidate node has more than one child, or its child
- * is not at the leftmost slot, or it is a multiorder entry,
- * we cannot shrink.
+ * is not at the leftmost slot, or the child is a multiorder
+ * entry, we cannot shrink.
*/
- if (to_free->count != 1)
+ if (node->count != 1)
+ break;
+ child = node->slots[0];
+ if (!child)
break;
- slot = to_free->slots[0];
- if (!slot)
+ if (!radix_tree_is_internal_node(child) && node->shift)
break;
+ if (radix_tree_is_internal_node(child))
+ entry_to_node(child)->parent = NULL;
+
/*
* We don't need rcu_assign_pointer(), since we are simply
* moving the node from one part of the tree to another: if it
* was safe to dereference the old pointer to it
- * (to_free->slots[0]), it will be safe to dereference the new
+ * (node->slots[0]), it will be safe to dereference the new
* one (root->rnode) as far as dependent read barriers go.
*/
- if (root->height > 1) {
- if (!radix_tree_is_indirect_ptr(slot))
- break;
-
- slot = indirect_to_ptr(slot);
- slot->parent = NULL;
- slot = ptr_to_indirect(slot);
- }
- root->rnode = slot;
- root->height--;
+ root->rnode = child;
/*
* We have a dilemma here. The node's slot[0] must not be
@@ -1403,7 +1444,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
* their slot to become empty sooner or later.
*
* For example, lockless pagecache will look up a slot, deref
- * the page pointer, and if the page is 0 refcount it means it
+ * the page pointer, and if the page has 0 refcount it means it
* was concurrently deleted from pagecache so try the deref
* again. Fortunately there is already a requirement for logic
* to retry the entire slot lookup -- the indirect pointer
@@ -1411,12 +1452,14 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
* also results in a stale slot). So tag the slot as indirect
* to force callers to retry.
*/
- if (root->height == 0)
- *((unsigned long *)&to_free->slots[0]) |=
- RADIX_TREE_INDIRECT_PTR;
+ if (!radix_tree_is_internal_node(child))
+ node->slots[0] = RADIX_TREE_RETRY;
- radix_tree_node_free(to_free);
+ radix_tree_node_free(node);
+ shrunk = true;
}
+
+ return shrunk;
}
/**
@@ -1439,24 +1482,17 @@ bool __radix_tree_delete_node(struct radix_tree_root *root,
struct radix_tree_node *parent;
if (node->count) {
- if (node == indirect_to_ptr(root->rnode)) {
- radix_tree_shrink(root);
- if (root->height == 0)
- deleted = true;
- }
+ if (node == entry_to_node(root->rnode))
+ deleted |= radix_tree_shrink(root);
return deleted;
}
parent = node->parent;
if (parent) {
- unsigned int offset;
-
- offset = node->path >> RADIX_TREE_HEIGHT_SHIFT;
- parent->slots[offset] = NULL;
+ parent->slots[node->offset] = NULL;
parent->count--;
} else {
root_tag_clear_all(root);
- root->height = 0;
root->rnode = NULL;
}
@@ -1469,6 +1505,20 @@ bool __radix_tree_delete_node(struct radix_tree_root *root,
return deleted;
}
+static inline void delete_sibling_entries(struct radix_tree_node *node,
+ void *ptr, unsigned offset)
+{
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+ int i;
+ for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
+ if (node->slots[offset + i] != ptr)
+ break;
+ node->slots[offset + i] = NULL;
+ node->count--;
+ }
+#endif
+}
+
/**
* radix_tree_delete_item - delete an item from a radix tree
* @root: radix tree root
@@ -1484,7 +1534,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
unsigned long index, void *item)
{
struct radix_tree_node *node;
- unsigned int offset, i;
+ unsigned int offset;
void **slot;
void *entry;
int tag;
@@ -1502,24 +1552,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
return entry;
}
- offset = index & RADIX_TREE_MAP_MASK;
+ offset = get_slot_offset(node, slot);
- /*
- * Clear all tags associated with the item to be deleted.
- * This way of doing it would be inefficient, but seldom is any set.
- */
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
- if (tag_get(node, tag, offset))
- radix_tree_tag_clear(root, index, tag);
- }
+ /* Clear all tags associated with the item to be deleted. */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+ node_tag_clear(root, node, tag, offset);
- /* Delete any sibling slots pointing to this slot */
- for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
- if (node->slots[offset + i] != ptr_to_indirect(slot))
- break;
- node->slots[offset + i] = NULL;
- node->count--;
- }
+ delete_sibling_entries(node, node_to_entry(slot), offset);
node->slots[offset] = NULL;
node->count--;
@@ -1544,6 +1583,20 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
}
EXPORT_SYMBOL(radix_tree_delete);
+void radix_tree_clear_tags(struct radix_tree_root *root,
+ struct radix_tree_node *node,
+ void **slot)
+{
+ if (node) {
+ unsigned int tag, offset = get_slot_offset(node, slot);
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+ node_tag_clear(root, node, tag, offset);
+ } else {
+ /* Clear root node tags */
+ root->gfp_mask &= __GFP_BITS_MASK;
+ }
+}
+
/**
* radix_tree_tagged - test whether any items in the tree are tagged
* @root: radix tree root
@@ -1576,33 +1629,37 @@ static __init unsigned long __maxindex(unsigned int height)
return ~0UL >> shift;
}
-static __init void radix_tree_init_maxindex(void)
+static __init void radix_tree_init_maxnodes(void)
{
- unsigned int i;
+ unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
+ unsigned int i, j;
for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
height_to_maxindex[i] = __maxindex(i);
+ for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
+ for (j = i; j > 0; j--)
+ height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
+ }
}
static int radix_tree_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+ unsigned long action, void *hcpu)
{
- int cpu = (long)hcpu;
- struct radix_tree_preload *rtp;
- struct radix_tree_node *node;
-
- /* Free per-cpu pool of perloaded nodes */
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
- rtp = &per_cpu(radix_tree_preloads, cpu);
- while (rtp->nr) {
+ int cpu = (long)hcpu;
+ struct radix_tree_preload *rtp;
+ struct radix_tree_node *node;
+
+ /* Free per-cpu pool of preloaded nodes */
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+ rtp = &per_cpu(radix_tree_preloads, cpu);
+ while (rtp->nr) {
node = rtp->nodes;
rtp->nodes = node->private_data;
kmem_cache_free(radix_tree_node_cachep, node);
rtp->nr--;
- }
- }
- return NOTIFY_OK;
+ }
+ }
+ return NOTIFY_OK;
}
void __init radix_tree_init(void)
@@ -1611,6 +1668,6 @@ void __init radix_tree_init(void)
sizeof(struct radix_tree_node), 0,
SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
radix_tree_node_ctor);
- radix_tree_init_maxindex();
+ radix_tree_init_maxnodes();
hotcpu_notifier(radix_tree_callback, 0);
}
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 0a7e494b2bcd..f01b1cb04f91 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -3,3 +3,4 @@ altivec*.c
int*.c
tables.c
neon?.c
+s390vx?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3b10a48fa040..3057011f5599 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -3,10 +3,11 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
int8.o int16.o int32.o
-raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
+raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
+raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
hostprogs-y += mktables
@@ -116,6 +117,11 @@ $(obj)/tilegx8.c: UNROLL := 8
$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
+targets += s390vx8.c
+$(obj)/s390vx8.c: UNROLL := 8
+$(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
quiet_cmd_mktable = TABLE $@
cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 975c6e0434bd..7857049fd7d3 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -49,6 +49,10 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_avx2x1,
&raid6_avx2x2,
#endif
+#ifdef CONFIG_AS_AVX512
+ &raid6_avx512x1,
+ &raid6_avx512x2,
+#endif
#endif
#if defined(__x86_64__) && !defined(__arch_um__)
&raid6_sse2x1,
@@ -59,6 +63,11 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_avx2x2,
&raid6_avx2x4,
#endif
+#ifdef CONFIG_AS_AVX512
+ &raid6_avx512x1,
+ &raid6_avx512x2,
+ &raid6_avx512x4,
+#endif
#endif
#ifdef CONFIG_ALTIVEC
&raid6_altivec1,
@@ -69,6 +78,9 @@ const struct raid6_calls * const raid6_algos[] = {
#if defined(CONFIG_TILEGX)
&raid6_tilegx8,
#endif
+#if defined(CONFIG_S390)
+ &raid6_s390vx8,
+#endif
&raid6_intx1,
&raid6_intx2,
&raid6_intx4,
@@ -89,12 +101,18 @@ void (*raid6_datap_recov)(int, size_t, int, void **);
EXPORT_SYMBOL_GPL(raid6_datap_recov);
const struct raid6_recov_calls *const raid6_recov_algos[] = {
+#ifdef CONFIG_AS_AVX512
+ &raid6_recov_avx512,
+#endif
#ifdef CONFIG_AS_AVX2
&raid6_recov_avx2,
#endif
#ifdef CONFIG_AS_SSSE3
&raid6_recov_ssse3,
#endif
+#ifdef CONFIG_S390
+ &raid6_recov_s390xc,
+#endif
&raid6_recov_intx1,
NULL
};
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c
new file mode 100644
index 000000000000..f524a7972006
--- /dev/null
+++ b/lib/raid6/avx512.c
@@ -0,0 +1,569 @@
+/* -*- linux-c -*- --------------------------------------------------------
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Author: Gayatri Kammela <gayatri.kammela@intel.com>
+ * Author: Megha Dey <megha.dey@linux.intel.com>
+ *
+ * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
+ * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * -----------------------------------------------------------------------
+ */
+
+/*
+ * AVX512 implementation of RAID-6 syndrome functions
+ *
+ */
+
+#ifdef CONFIG_AS_AVX512
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static const struct raid6_avx512_constants {
+ u64 x1d[8];
+} raid6_avx512_constants __aligned(512) = {
+ { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
+ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
+};
+
+static int raid6_have_avx512(void)
+{
+ return boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ boot_cpu_has(X86_FEATURE_AVX512DQ);
+}
+
+static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa64 %0,%%zmm0\n\t"
+ "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
+ :
+ : "m" (raid6_avx512_constants.x1d[0]));
+
+ for (d = 0; d < bytes; d += 64) {
+ asm volatile("prefetchnta %0\n\t"
+ "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
+ "prefetchnta %1\n\t"
+ "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
+ "vmovdqa64 %1,%%zmm6"
+ :
+ : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
+ for (z = z0-2; z >= 0; z--) {
+ asm volatile("prefetchnta %0\n\t"
+ "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
+ "vmovdqa64 %0,%%zmm6"
+ :
+ : "m" (dptr[z][d]));
+ }
+ asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
+ "vmovntdq %%zmm2,%0\n\t"
+ "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
+ "vmovntdq %%zmm4,%1\n\t"
+ "vpxorq %%zmm4,%%zmm4,%%zmm4"
+ :
+ : "m" (p[d]), "m" (q[d]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa64 %0,%%zmm0"
+ : : "m" (raid6_avx512_constants.x1d[0]));
+
+ for (d = 0 ; d < bytes ; d += 64) {
+ asm volatile("vmovdqa64 %0,%%zmm4\n\t"
+ "vmovdqa64 %1,%%zmm2\n\t"
+ "vpxorq %%zmm4,%%zmm2,%%zmm2"
+ :
+ : "m" (dptr[z0][d]), "m" (p[d]));
+ /* P/Q data pages */
+ for (z = z0-1 ; z >= start ; z--) {
+ asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+ "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vmovdqa64 %0,%%zmm5\n\t"
+ "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4"
+ :
+ : "m" (dptr[z][d]));
+ }
+ /* P/Q left side optimization */
+ for (z = start-1 ; z >= 0 ; z--) {
+ asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+ "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4"
+ :
+ : );
+ }
+ asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
+ /* Don't use movntdq for r/w memory area < cache line */
+ "vmovdqa64 %%zmm4,%0\n\t"
+ "vmovdqa64 %%zmm2,%1"
+ :
+ : "m" (q[d]), "m" (p[d]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx512x1 = {
+ raid6_avx5121_gen_syndrome,
+ raid6_avx5121_xor_syndrome,
+ raid6_have_avx512,
+ "avx512x1",
+ 1 /* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 AVX512 implementation
+ */
+static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa64 %0,%%zmm0\n\t"
+ "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
+ :
+ : "m" (raid6_avx512_constants.x1d[0]));
+
+ /* We uniformly assume a single prefetch covers at least 64 bytes */
+ for (d = 0; d < bytes; d += 128) {
+ asm volatile("prefetchnta %0\n\t"
+ "prefetchnta %1\n\t"
+ "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
+ "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */
+ "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
+ "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */
+ :
+ : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
+ for (z = z0-1; z >= 0; z--) {
+ asm volatile("prefetchnta %0\n\t"
+ "prefetchnta %1\n\t"
+ "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
+ "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpmovm2b %%k2,%%zmm7\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+ "vmovdqa64 %0,%%zmm5\n\t"
+ "vmovdqa64 %1,%%zmm7\n\t"
+ "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6"
+ :
+ : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
+ }
+ asm volatile("vmovntdq %%zmm2,%0\n\t"
+ "vmovntdq %%zmm3,%1\n\t"
+ "vmovntdq %%zmm4,%2\n\t"
+ "vmovntdq %%zmm6,%3"
+ :
+ : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
+ "m" (q[d+64]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa64 %0,%%zmm0"
+ : : "m" (raid6_avx512_constants.x1d[0]));
+
+ for (d = 0 ; d < bytes ; d += 128) {
+ asm volatile("vmovdqa64 %0,%%zmm4\n\t"
+ "vmovdqa64 %1,%%zmm6\n\t"
+ "vmovdqa64 %2,%%zmm2\n\t"
+ "vmovdqa64 %3,%%zmm3\n\t"
+ "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm6,%%zmm3,%%zmm3"
+ :
+ : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
+ "m" (p[d]), "m" (p[d+64]));
+ /* P/Q data pages */
+ for (z = z0-1 ; z >= start ; z--) {
+ asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+ "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
+ "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+ "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpmovm2b %%k2,%%zmm7\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+ "vmovdqa64 %0,%%zmm5\n\t"
+ "vmovdqa64 %1,%%zmm7\n\t"
+ "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6"
+ :
+ : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
+ }
+ /* P/Q left side optimization */
+ for (z = start-1 ; z >= 0 ; z--) {
+ asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+ "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
+ "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+ "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpmovm2b %%k2,%%zmm7\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6"
+ :
+ : );
+ }
+ asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
+ "vpxorq %1,%%zmm6,%%zmm6\n\t"
+ /* Don't use movntdq for r/w
+ * memory area < cache line
+ */
+ "vmovdqa64 %%zmm4,%0\n\t"
+ "vmovdqa64 %%zmm6,%1\n\t"
+ "vmovdqa64 %%zmm2,%2\n\t"
+ "vmovdqa64 %%zmm3,%3"
+ :
+ : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
+ "m" (p[d+64]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_avx512x2 = {
+ raid6_avx5122_gen_syndrome,
+ raid6_avx5122_xor_syndrome,
+ raid6_have_avx512,
+ "avx512x2",
+ 1 /* Has cache hints */
+};
+
+#ifdef CONFIG_X86_64
+
+/*
+ * Unrolled-by-4 AVX2 implementation
+ */
+static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa64 %0,%%zmm0\n\t"
+ "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */
+ "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */
+ "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */
+ "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */
+ "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */
+ "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */
+ "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */
+ "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */
+ "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */
+ :
+ : "m" (raid6_avx512_constants.x1d[0]));
+
+ for (d = 0; d < bytes; d += 256) {
+ for (z = z0; z >= 0; z--) {
+ asm volatile("prefetchnta %0\n\t"
+ "prefetchnta %1\n\t"
+ "prefetchnta %2\n\t"
+ "prefetchnta %3\n\t"
+ "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
+ "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
+ "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
+ "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpmovm2b %%k2,%%zmm7\n\t"
+ "vpmovm2b %%k3,%%zmm13\n\t"
+ "vpmovm2b %%k4,%%zmm15\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+ "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
+ "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+ "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
+ "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+ "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+ "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
+ "vmovdqa64 %0,%%zmm5\n\t"
+ "vmovdqa64 %1,%%zmm7\n\t"
+ "vmovdqa64 %2,%%zmm13\n\t"
+ "vmovdqa64 %3,%%zmm15\n\t"
+ "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
+ "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
+ "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+ "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+ "vpxorq %%zmm15,%%zmm14,%%zmm14"
+ :
+ : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
+ "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
+ }
+ asm volatile("vmovntdq %%zmm2,%0\n\t"
+ "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
+ "vmovntdq %%zmm3,%1\n\t"
+ "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
+ "vmovntdq %%zmm10,%2\n\t"
+ "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
+ "vmovntdq %%zmm11,%3\n\t"
+ "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
+ "vmovntdq %%zmm4,%4\n\t"
+ "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vmovntdq %%zmm6,%5\n\t"
+ "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
+ "vmovntdq %%zmm12,%6\n\t"
+ "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
+ "vmovntdq %%zmm14,%7\n\t"
+ "vpxorq %%zmm14,%%zmm14,%%zmm14"
+ :
+ : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
+ "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
+ "m" (q[d+128]), "m" (q[d+192]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("vmovdqa64 %0,%%zmm0"
+ :: "m" (raid6_avx512_constants.x1d[0]));
+
+ for (d = 0 ; d < bytes ; d += 256) {
+ asm volatile("vmovdqa64 %0,%%zmm4\n\t"
+ "vmovdqa64 %1,%%zmm6\n\t"
+ "vmovdqa64 %2,%%zmm12\n\t"
+ "vmovdqa64 %3,%%zmm14\n\t"
+ "vmovdqa64 %4,%%zmm2\n\t"
+ "vmovdqa64 %5,%%zmm3\n\t"
+ "vmovdqa64 %6,%%zmm10\n\t"
+ "vmovdqa64 %7,%%zmm11\n\t"
+ "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
+ "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
+ "vpxorq %%zmm14,%%zmm11,%%zmm11"
+ :
+ : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
+ "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
+ "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
+ "m" (p[d+192]));
+ /* P/Q data pages */
+ for (z = z0-1 ; z >= start ; z--) {
+ asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+ "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
+ "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
+ "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
+ "prefetchnta %0\n\t"
+ "prefetchnta %2\n\t"
+ "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+ "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
+ "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
+ "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpmovm2b %%k2,%%zmm7\n\t"
+ "vpmovm2b %%k3,%%zmm13\n\t"
+ "vpmovm2b %%k4,%%zmm15\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+ "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
+ "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+ "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
+ "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+ "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+ "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
+ "vmovdqa64 %0,%%zmm5\n\t"
+ "vmovdqa64 %1,%%zmm7\n\t"
+ "vmovdqa64 %2,%%zmm13\n\t"
+ "vmovdqa64 %3,%%zmm15\n\t"
+ "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
+ "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
+ "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
+ "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+ "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+ "vpxorq %%zmm15,%%zmm14,%%zmm14"
+ :
+ : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
+ "m" (dptr[z][d+128]),
+ "m" (dptr[z][d+192]));
+ }
+ asm volatile("prefetchnta %0\n\t"
+ "prefetchnta %1\n\t"
+ :
+ : "m" (q[d]), "m" (q[d+128]));
+ /* P/Q left side optimization */
+ for (z = start-1 ; z >= 0 ; z--) {
+ asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
+ "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
+ "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
+ "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
+ "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
+ "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
+ "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
+ "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
+ "vpmovm2b %%k1,%%zmm5\n\t"
+ "vpmovm2b %%k2,%%zmm7\n\t"
+ "vpmovm2b %%k3,%%zmm13\n\t"
+ "vpmovm2b %%k4,%%zmm15\n\t"
+ "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
+ "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
+ "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
+ "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
+ "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
+ "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
+ "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
+ "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
+ "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
+ "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
+ "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
+ "vpxorq %%zmm15,%%zmm14,%%zmm14"
+ :
+ : );
+ }
+ asm volatile("vmovntdq %%zmm2,%0\n\t"
+ "vmovntdq %%zmm3,%1\n\t"
+ "vmovntdq %%zmm10,%2\n\t"
+ "vmovntdq %%zmm11,%3\n\t"
+ "vpxorq %4,%%zmm4,%%zmm4\n\t"
+ "vpxorq %5,%%zmm6,%%zmm6\n\t"
+ "vpxorq %6,%%zmm12,%%zmm12\n\t"
+ "vpxorq %7,%%zmm14,%%zmm14\n\t"
+ "vmovntdq %%zmm4,%4\n\t"
+ "vmovntdq %%zmm6,%5\n\t"
+ "vmovntdq %%zmm12,%6\n\t"
+ "vmovntdq %%zmm14,%7"
+ :
+ : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
+ "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
+ "m" (q[d+128]), "m" (q[d+192]));
+ }
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+const struct raid6_calls raid6_avx512x4 = {
+ raid6_avx5124_gen_syndrome,
+ raid6_avx5124_xor_syndrome,
+ raid6_have_avx512,
+ "avx512x4",
+ 1 /* Has cache hints */
+};
+#endif
+
+#endif /* CONFIG_AS_AVX512 */
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c
new file mode 100644
index 000000000000..625aafa33b61
--- /dev/null
+++ b/lib/raid6/recov_avx512.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Author: Gayatri Kammela <gayatri.kammela@intel.com>
+ * Author: Megha Dey <megha.dey@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ */
+
+#ifdef CONFIG_AS_AVX512
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static int raid6_has_avx512(void)
+{
+ return boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ boot_cpu_has(X86_FEATURE_AVX512DQ);
+}
+
+static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+ const u8 x0f = 0x0f;
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /*
+ * Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for
+ * delta p and delta q
+ */
+
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks-2] = p;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+ raid6_gfexp[failb]]];
+
+ kernel_fpu_begin();
+
+ /* zmm0 = x0f[16] */
+ asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
+
+ while (bytes) {
+#ifdef CONFIG_X86_64
+ asm volatile("vmovdqa64 %0, %%zmm1\n\t"
+ "vmovdqa64 %1, %%zmm9\n\t"
+ "vmovdqa64 %2, %%zmm0\n\t"
+ "vmovdqa64 %3, %%zmm8\n\t"
+ "vpxorq %4, %%zmm1, %%zmm1\n\t"
+ "vpxorq %5, %%zmm9, %%zmm9\n\t"
+ "vpxorq %6, %%zmm0, %%zmm0\n\t"
+ "vpxorq %7, %%zmm8, %%zmm8"
+ :
+ : "m" (q[0]), "m" (q[64]), "m" (p[0]),
+ "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
+ "m" (dp[0]), "m" (dp[64]));
+
+ /*
+ * 1 = dq[0] ^ q[0]
+ * 9 = dq[64] ^ q[64]
+ * 0 = dp[0] ^ p[0]
+ * 8 = dp[64] ^ p[64]
+ */
+
+ asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
+ "vbroadcasti64x2 %1, %%zmm5"
+ :
+ : "m" (qmul[0]), "m" (qmul[16]));
+
+ asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
+ "vpsraw $4, %%zmm9, %%zmm12\n\t"
+ "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
+ "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
+ "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
+ "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
+ "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
+ "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
+ "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
+ "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
+ "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
+ "vpxorq %%zmm4, %%zmm5, %%zmm5"
+ :
+ : );
+
+ /*
+ * 5 = qx[0]
+ * 15 = qx[64]
+ */
+
+ asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
+ "vbroadcasti64x2 %1, %%zmm1\n\t"
+ "vpsraw $4, %%zmm0, %%zmm2\n\t"
+ "vpsraw $4, %%zmm8, %%zmm6\n\t"
+ "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
+ "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
+ "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
+ "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
+ "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
+ "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
+ "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
+ "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
+ "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
+ "vpxorq %%zmm12, %%zmm13, %%zmm13"
+ :
+ : "m" (pbmul[0]), "m" (pbmul[16]));
+
+ /*
+ * 1 = pbmul[px[0]]
+ * 13 = pbmul[px[64]]
+ */
+ asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
+ "vpxorq %%zmm15, %%zmm13, %%zmm13"
+ :
+ : );
+
+ /*
+ * 1 = db = DQ
+ * 13 = db[64] = DQ[64]
+ */
+ asm volatile("vmovdqa64 %%zmm1, %0\n\t"
+ "vmovdqa64 %%zmm13,%1\n\t"
+ "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
+ "vpxorq %%zmm13, %%zmm8, %%zmm8"
+ :
+ : "m" (dq[0]), "m" (dq[64]));
+
+ asm volatile("vmovdqa64 %%zmm0, %0\n\t"
+ "vmovdqa64 %%zmm8, %1"
+ :
+ : "m" (dp[0]), "m" (dp[64]));
+
+ bytes -= 128;
+ p += 128;
+ q += 128;
+ dp += 128;
+ dq += 128;
+#else
+ asm volatile("vmovdqa64 %0, %%zmm1\n\t"
+ "vmovdqa64 %1, %%zmm0\n\t"
+ "vpxorq %2, %%zmm1, %%zmm1\n\t"
+ "vpxorq %3, %%zmm0, %%zmm0"
+ :
+ : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
+
+ /* 1 = dq ^ q; 0 = dp ^ p */
+
+ asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
+ "vbroadcasti64x2 %1, %%zmm5"
+ :
+ : "m" (qmul[0]), "m" (qmul[16]));
+
+ /*
+ * 1 = dq ^ q
+ * 3 = dq ^ p >> 4
+ */
+ asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
+ "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
+ "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
+ "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
+ "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
+ "vpxorq %%zmm4, %%zmm5, %%zmm5"
+ :
+ : );
+
+ /* 5 = qx */
+
+ asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
+ "vbroadcasti64x2 %1, %%zmm1"
+ :
+ : "m" (pbmul[0]), "m" (pbmul[16]));
+
+ asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
+ "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
+ "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
+ "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
+ "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
+ "vpxorq %%zmm4, %%zmm1, %%zmm1"
+ :
+ : );
+
+ /* 1 = pbmul[px] */
+ asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
+ /* 1 = db = DQ */
+ "vmovdqa64 %%zmm1, %0\n\t"
+ :
+ : "m" (dq[0]));
+
+ asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
+ "vmovdqa64 %%zmm0, %0"
+ :
+ : "m" (dp[0]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dp += 64;
+ dq += 64;
+#endif
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+ const u8 x0f = 0x0f;
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /*
+ * Compute syndrome with zero for the missing data page
+ * Use the dead data page as temporary storage for delta q
+ */
+
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ kernel_fpu_begin();
+
+ asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
+
+ while (bytes) {
+#ifdef CONFIG_X86_64
+ asm volatile("vmovdqa64 %0, %%zmm3\n\t"
+ "vmovdqa64 %1, %%zmm8\n\t"
+ "vpxorq %2, %%zmm3, %%zmm3\n\t"
+ "vpxorq %3, %%zmm8, %%zmm8"
+ :
+ : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
+ "m" (q[64]));
+
+ /*
+ * 3 = q[0] ^ dq[0]
+ * 8 = q[64] ^ dq[64]
+ */
+ asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
+ "vmovapd %%zmm0, %%zmm13\n\t"
+ "vbroadcasti64x2 %1, %%zmm1\n\t"
+ "vmovapd %%zmm1, %%zmm14"
+ :
+ : "m" (qmul[0]), "m" (qmul[16]));
+
+ asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
+ "vpsraw $4, %%zmm8, %%zmm12\n\t"
+ "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
+ "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
+ "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
+ "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
+ "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
+ "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
+ "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
+ "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
+ "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
+ "vpxorq %%zmm13, %%zmm14, %%zmm14"
+ :
+ : );
+
+ /*
+ * 1 = qmul[q[0] ^ dq[0]]
+ * 14 = qmul[q[64] ^ dq[64]]
+ */
+ asm volatile("vmovdqa64 %0, %%zmm2\n\t"
+ "vmovdqa64 %1, %%zmm12\n\t"
+ "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
+ "vpxorq %%zmm14, %%zmm12, %%zmm12"
+ :
+ : "m" (p[0]), "m" (p[64]));
+
+ /*
+ * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
+ * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
+ */
+
+ asm volatile("vmovdqa64 %%zmm1, %0\n\t"
+ "vmovdqa64 %%zmm14, %1\n\t"
+ "vmovdqa64 %%zmm2, %2\n\t"
+ "vmovdqa64 %%zmm12,%3"
+ :
+ : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
+ "m" (p[64]));
+
+ bytes -= 128;
+ p += 128;
+ q += 128;
+ dq += 128;
+#else
+ asm volatile("vmovdqa64 %0, %%zmm3\n\t"
+ "vpxorq %1, %%zmm3, %%zmm3"
+ :
+ : "m" (dq[0]), "m" (q[0]));
+
+ /* 3 = q ^ dq */
+
+ asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
+ "vbroadcasti64x2 %1, %%zmm1"
+ :
+ : "m" (qmul[0]), "m" (qmul[16]));
+
+ asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
+ "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
+ "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
+ "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
+ "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
+ "vpxorq %%zmm0, %%zmm1, %%zmm1"
+ :
+ : );
+
+ /* 1 = qmul[q ^ dq] */
+
+ asm volatile("vmovdqa64 %0, %%zmm2\n\t"
+ "vpxorq %%zmm1, %%zmm2, %%zmm2"
+ :
+ : "m" (p[0]));
+
+ /* 2 = p ^ qmul[q ^ dq] */
+
+ asm volatile("vmovdqa64 %%zmm1, %0\n\t"
+ "vmovdqa64 %%zmm2, %1"
+ :
+ : "m" (dq[0]), "m" (p[0]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dq += 64;
+#endif
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_avx512 = {
+ .data2 = raid6_2data_recov_avx512,
+ .datap = raid6_datap_recov_avx512,
+ .valid = raid6_has_avx512,
+#ifdef CONFIG_X86_64
+ .name = "avx512x2",
+#else
+ .name = "avx512x1",
+#endif
+ .priority = 3,
+};
+
+#else
+#warning "your version of binutils lacks AVX512 support"
+#endif
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c
new file mode 100644
index 000000000000..b042dac826cc
--- /dev/null
+++ b/lib/raid6/recov_s390xc.c
@@ -0,0 +1,116 @@
+/*
+ * RAID-6 data recovery in dual failure mode based on the XC instruction.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/raid/pq.h>
+
+static inline void xor_block(u8 *p1, u8 *p2)
+{
+ typedef struct { u8 _[256]; } addrtype;
+
+ asm volatile(
+ " xc 0(256,%[p1]),0(%[p2])\n"
+ : "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2),
+ [p1] "a" (p1), [p2] "a" (p2) : "cc");
+}
+
+/* Recover two failed data blocks. */
+static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+ int i;
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data pages
+ Use the dead data pages as temporary storage for
+ delta p and delta q */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks-2] = p;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+ /* Now do it... */
+ while (bytes) {
+ xor_block(dp, p);
+ xor_block(dq, q);
+ for (i = 0; i < 256; i++)
+ dq[i] = pbmul[dp[i]] ^ qmul[dq[i]];
+ xor_block(dp, dq);
+ p += 256;
+ q += 256;
+ dp += 256;
+ dq += 256;
+ bytes -= 256;
+ }
+}
+
+/* Recover failure of one data block plus the P block */
+static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+ int i;
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data page
+ Use the dead data page as temporary storage for delta q */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ /* Now do it... */
+ while (bytes) {
+ xor_block(dq, q);
+ for (i = 0; i < 256; i++)
+ dq[i] = qmul[dq[i]];
+ xor_block(p, dq);
+ p += 256;
+ q += 256;
+ dq += 256;
+ bytes -= 256;
+ }
+}
+
+
+const struct raid6_recov_calls raid6_recov_s390xc = {
+ .data2 = raid6_2data_recov_s390xc,
+ .datap = raid6_datap_recov_s390xc,
+ .valid = NULL,
+ .name = "s390xc",
+ .priority = 1,
+};
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
new file mode 100644
index 000000000000..7b45191a655f
--- /dev/null
+++ b/lib/raid6/s390vx.uc
@@ -0,0 +1,168 @@
+/*
+ * raid6_vx$#.c
+ *
+ * $#-way unrolled RAID6 gen/xor functions for s390
+ * based on the vector facility
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This file is postprocessed using unroll.awk.
+ */
+
+#include <linux/raid/pq.h>
+#include <asm/fpu/api.h>
+
+asm(".include \"asm/vx-insn.h\"\n");
+
+#define NSIZE 16
+
+static inline void LOAD_CONST(void)
+{
+ asm volatile("VREPIB %v24,7");
+ asm volatile("VREPIB %v25,0x1d");
+}
+
+/*
+ * The SHLBYTE() operation shifts each of the 16 bytes in
+ * vector register y left by 1 bit and stores the result in
+ * vector register x.
+ */
+static inline void SHLBYTE(int x, int y)
+{
+ asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
+}
+
+/*
+ * For each of the 16 bytes in the vector register y the MASK()
+ * operation returns 0xFF if the high bit of the byte is 1,
+ * or 0x00 if the high bit is 0. The result is stored in vector
+ * register x.
+ */
+static inline void MASK(int x, int y)
+{
+ asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y));
+}
+
+static inline void AND(int x, int y, int z)
+{
+ asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+
+static inline void XOR(int x, int y, int z)
+{
+ asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+
+static inline void LOAD_DATA(int x, int n, u8 *ptr)
+{
+ typedef struct { u8 _[16*n]; } addrtype;
+ register addrtype *__ptr asm("1") = (addrtype *) ptr;
+
+ asm volatile ("VLM %2,%3,0,%r1"
+ : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+
+static inline void STORE_DATA(int x, int n, u8 *ptr)
+{
+ typedef struct { u8 _[16*n]; } addrtype;
+ register addrtype *__ptr asm("1") = (addrtype *) ptr;
+
+ asm volatile ("VSTM %2,%3,0,1"
+ : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+
+static inline void COPY_VEC(int x, int y)
+{
+ asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
+}
+
+static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ struct kernel_fpu vxstate;
+ u8 **dptr, *p, *q;
+ int d, z, z0;
+
+ kernel_fpu_begin(&vxstate, KERNEL_VXR);
+ LOAD_CONST();
+
+ dptr = (u8 **) ptrs;
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0 + 1]; /* XOR parity */
+ q = dptr[z0 + 2]; /* RS syndrome */
+
+ for (d = 0; d < bytes; d += $#*NSIZE) {
+ LOAD_DATA(0,$#,&dptr[z0][d]);
+ COPY_VEC(8+$$,0+$$);
+ for (z = z0 - 1; z >= 0; z--) {
+ MASK(16+$$,8+$$);
+ AND(16+$$,16+$$,25);
+ SHLBYTE(8+$$,8+$$);
+ XOR(8+$$,8+$$,16+$$);
+ LOAD_DATA(16,$#,&dptr[z][d]);
+ XOR(0+$$,0+$$,16+$$);
+ XOR(8+$$,8+$$,16+$$);
+ }
+ STORE_DATA(0,$#,&p[d]);
+ STORE_DATA(8,$#,&q[d]);
+ }
+ kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+
+static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ struct kernel_fpu vxstate;
+ u8 **dptr, *p, *q;
+ int d, z, z0;
+
+ dptr = (u8 **) ptrs;
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks - 2]; /* XOR parity */
+ q = dptr[disks - 1]; /* RS syndrome */
+
+ kernel_fpu_begin(&vxstate, KERNEL_VXR);
+ LOAD_CONST();
+
+ for (d = 0; d < bytes; d += $#*NSIZE) {
+ /* P/Q data pages */
+ LOAD_DATA(0,$#,&dptr[z0][d]);
+ COPY_VEC(8+$$,0+$$);
+ for (z = z0 - 1; z >= start; z--) {
+ MASK(16+$$,8+$$);
+ AND(16+$$,16+$$,25);
+ SHLBYTE(8+$$,8+$$);
+ XOR(8+$$,8+$$,16+$$);
+ LOAD_DATA(16,$#,&dptr[z][d]);
+ XOR(0+$$,0+$$,16+$$);
+ XOR(8+$$,8+$$,16+$$);
+ }
+ /* P/Q left side optimization */
+ for (z = start - 1; z >= 0; z--) {
+ MASK(16+$$,8+$$);
+ AND(16+$$,16+$$,25);
+ SHLBYTE(8+$$,8+$$);
+ XOR(8+$$,8+$$,16+$$);
+ }
+ LOAD_DATA(16,$#,&p[d]);
+ XOR(16+$$,16+$$,0+$$);
+ STORE_DATA(16,$#,&p[d]);
+ LOAD_DATA(16,$#,&q[d]);
+ XOR(16+$$,16+$$,8+$$);
+ STORE_DATA(16,$#,&q[d]);
+ }
+ kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+
+static int raid6_s390vx$#_valid(void)
+{
+ return MACHINE_HAS_VX;
+}
+
+const struct raid6_calls raid6_s390vx$# = {
+ raid6_s390vx$#_gen_syndrome,
+ raid6_s390vx$#_xor_syndrome,
+ raid6_s390vx$#_valid,
+ "vx128x$#",
+ 1
+};
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 29090f3db677..2c7b60edea04 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -32,10 +32,13 @@ ifeq ($(ARCH),arm64)
endif
ifeq ($(IS_X86),yes)
- OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
+ OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
gcc -c -x assembler - >&/dev/null && \
rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+ CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \
+ gcc -c -x assembler - >&/dev/null && \
+ rm ./-.o && echo -DCONFIG_AS_AVX512=1)
else ifeq ($(HAS_NEON),yes)
OBJS += neon.o neon1.o neon2.o neon4.o neon8.o
CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 3bebbabdb510..b07f4d8e6b03 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -21,12 +21,13 @@
#define NDISKS 16 /* Including P and Q */
-const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
struct raid6_calls raid6_call;
char *dataptrs[NDISKS];
-char data[NDISKS][PAGE_SIZE];
-char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
+char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+char recovi[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+char recovj[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
static void makedata(int start, int stop)
{
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index 8fe9d9662abb..834d268a4b05 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void)
#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular)
+ * Instructions
+ */
+#define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular)
+ * Instructions
+ */
+#define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length)
+ * Extensions
+ */
#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
/* Should work well enough on modern CPUs for testing */
diff --git a/lib/random32.c b/lib/random32.c
index 510d1ce7d4d2..915982b304bb 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -81,7 +81,7 @@ u32 prandom_u32(void)
u32 res;
res = prandom_u32_state(state);
- put_cpu_var(state);
+ put_cpu_var(net_rand_state);
return res;
}
@@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes)
struct rnd_state *state = &get_cpu_var(net_rand_state);
prandom_bytes_state(state, buf, bytes);
- put_cpu_var(state);
+ put_cpu_var(net_rand_state);
}
EXPORT_SYMBOL(prandom_bytes);
@@ -233,7 +233,6 @@ static void __prandom_timer(unsigned long dontcare)
static void __init __prandom_start_seed_timer(void)
{
- set_timer_slack(&seed_timer, HZ);
seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
add_timer(&seed_timer);
}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 2c5de86460c5..08f8043cac61 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -46,12 +46,14 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
rs->begin = jiffies;
if (time_is_before_jiffies(rs->begin + rs->interval)) {
- if (rs->missed)
- printk(KERN_WARNING "%s: %d callbacks suppressed\n",
- func, rs->missed);
+ if (rs->missed) {
+ if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+ pr_warn("%s: %d callbacks suppressed\n", func, rs->missed);
+ rs->missed = 0;
+ }
+ }
rs->begin = jiffies;
rs->printed = 0;
- rs->missed = 0;
}
if (rs->burst && rs->burst > rs->printed) {
rs->printed++;
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1356454e36de..eb8a19fee110 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -539,17 +539,39 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
{
struct rb_node *parent = rb_parent(victim);
+ /* Copy the pointers/colour from the victim to the replacement */
+ *new = *victim;
+
/* Set the surrounding nodes to point to the replacement */
- __rb_change_child(victim, new, parent, root);
if (victim->rb_left)
rb_set_parent(victim->rb_left, new);
if (victim->rb_right)
rb_set_parent(victim->rb_right, new);
+ __rb_change_child(victim, new, parent, root);
+}
+EXPORT_SYMBOL(rb_replace_node);
+
+void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root)
+{
+ struct rb_node *parent = rb_parent(victim);
/* Copy the pointers/colour from the victim to the replacement */
*new = *victim;
+
+ /* Set the surrounding nodes to point to the replacement */
+ if (victim->rb_left)
+ rb_set_parent(victim->rb_left, new);
+ if (victim->rb_right)
+ rb_set_parent(victim->rb_right, new);
+
+ /* Set the parent's pointer to the new node last after an RCU barrier
+ * so that the pointers onwards are seen to be set correctly when doing
+ * an RCU walk over the tree.
+ */
+ __rb_change_child_rcu(victim, new, parent, root);
}
-EXPORT_SYMBOL(rb_replace_node);
+EXPORT_SYMBOL(rb_replace_node_rcu);
static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
{
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 5d845ffd7982..32d0ad058380 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -30,7 +30,7 @@
#define HASH_DEFAULT_SIZE 64UL
#define HASH_MIN_SIZE 4U
-#define BUCKET_LOCKS_PER_CPU 128UL
+#define BUCKET_LOCKS_PER_CPU 32UL
static u32 head_hashfn(struct rhashtable *ht,
const struct bucket_table *tbl,
@@ -70,21 +70,25 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
unsigned int nr_pcpus = num_possible_cpus();
#endif
- nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+ nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
/* Never allocate more than 0.5 locks per bucket */
size = min_t(unsigned int, size, tbl->size >> 1);
if (sizeof(spinlock_t) != 0) {
+ tbl->locks = NULL;
#ifdef CONFIG_NUMA
if (size * sizeof(spinlock_t) > PAGE_SIZE &&
gfp == GFP_KERNEL)
tbl->locks = vmalloc(size * sizeof(spinlock_t));
- else
#endif
- tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
- gfp);
+ if (gfp != GFP_KERNEL)
+ gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
+ if (!tbl->locks)
+ tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+ gfp);
if (!tbl->locks)
return -ENOMEM;
for (i = 0; i < size; i++)
@@ -321,12 +325,14 @@ static int rhashtable_expand(struct rhashtable *ht)
static int rhashtable_shrink(struct rhashtable *ht)
{
struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
- unsigned int size;
+ unsigned int nelems = atomic_read(&ht->nelems);
+ unsigned int size = 0;
int err;
ASSERT_RHT_MUTEX(ht);
- size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+ if (nelems)
+ size = roundup_pow_of_two(nelems * 3 / 2);
if (size < ht->p.min_size)
size = ht->p.min_size;
@@ -372,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work)
schedule_work(&ht->run_work);
}
-static bool rhashtable_check_elasticity(struct rhashtable *ht,
- struct bucket_table *tbl,
- unsigned int hash)
-{
- unsigned int elasticity = ht->elasticity;
- struct rhash_head *head;
-
- rht_for_each(head, tbl, hash)
- if (!--elasticity)
- return true;
-
- return false;
-}
-
-int rhashtable_insert_rehash(struct rhashtable *ht,
- struct bucket_table *tbl)
+static int rhashtable_insert_rehash(struct rhashtable *ht,
+ struct bucket_table *tbl)
{
struct bucket_table *old_tbl;
struct bucket_table *new_tbl;
@@ -433,61 +425,172 @@ fail:
return err;
}
-EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
-struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
- const void *key,
- struct rhash_head *obj,
- struct bucket_table *tbl)
+static void *rhashtable_lookup_one(struct rhashtable *ht,
+ struct bucket_table *tbl, unsigned int hash,
+ const void *key, struct rhash_head *obj)
{
+ struct rhashtable_compare_arg arg = {
+ .ht = ht,
+ .key = key,
+ };
+ struct rhash_head __rcu **pprev;
struct rhash_head *head;
- unsigned int hash;
- int err;
+ int elasticity;
- tbl = rhashtable_last_table(ht, tbl);
- hash = head_hashfn(ht, tbl, obj);
- spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+ elasticity = ht->elasticity;
+ pprev = &tbl->buckets[hash];
+ rht_for_each(head, tbl, hash) {
+ struct rhlist_head *list;
+ struct rhlist_head *plist;
- err = -EEXIST;
- if (key && rhashtable_lookup_fast(ht, key, ht->p))
- goto exit;
+ elasticity--;
+ if (!key ||
+ (ht->p.obj_cmpfn ?
+ ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
+ rhashtable_compare(&arg, rht_obj(ht, head))))
+ continue;
- err = -E2BIG;
- if (unlikely(rht_grow_above_max(ht, tbl)))
- goto exit;
+ if (!ht->rhlist)
+ return rht_obj(ht, head);
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ plist = container_of(head, struct rhlist_head, rhead);
+
+ RCU_INIT_POINTER(list->next, plist);
+ head = rht_dereference_bucket(head->next, tbl, hash);
+ RCU_INIT_POINTER(list->rhead.next, head);
+ rcu_assign_pointer(*pprev, obj);
- err = -EAGAIN;
- if (rhashtable_check_elasticity(ht, tbl, hash) ||
- rht_grow_above_100(ht, tbl))
- goto exit;
+ return NULL;
+ }
- err = 0;
+ if (elasticity <= 0)
+ return ERR_PTR(-EAGAIN);
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
+ struct bucket_table *tbl,
+ unsigned int hash,
+ struct rhash_head *obj,
+ void *data)
+{
+ struct bucket_table *new_tbl;
+ struct rhash_head *head;
+
+ if (!IS_ERR_OR_NULL(data))
+ return ERR_PTR(-EEXIST);
+
+ if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
+ return ERR_CAST(data);
+
+ new_tbl = rcu_dereference(tbl->future_tbl);
+ if (new_tbl)
+ return new_tbl;
+
+ if (PTR_ERR(data) != -ENOENT)
+ return ERR_CAST(data);
+
+ if (unlikely(rht_grow_above_max(ht, tbl)))
+ return ERR_PTR(-E2BIG);
+
+ if (unlikely(rht_grow_above_100(ht, tbl)))
+ return ERR_PTR(-EAGAIN);
head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
RCU_INIT_POINTER(obj->next, head);
+ if (ht->rhlist) {
+ struct rhlist_head *list;
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ RCU_INIT_POINTER(list->next, NULL);
+ }
rcu_assign_pointer(tbl->buckets[hash], obj);
atomic_inc(&ht->nelems);
+ if (rht_grow_above_75(ht, tbl))
+ schedule_work(&ht->run_work);
-exit:
- spin_unlock(rht_bucket_lock(tbl, hash));
+ return NULL;
+}
- if (err == 0)
- return NULL;
- else if (err == -EAGAIN)
- return tbl;
- else
- return ERR_PTR(err);
+static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj)
+{
+ struct bucket_table *new_tbl;
+ struct bucket_table *tbl;
+ unsigned int hash;
+ spinlock_t *lock;
+ void *data;
+
+ tbl = rcu_dereference(ht->tbl);
+
+ /* All insertions must grab the oldest table containing
+ * the hashed bucket that is yet to be rehashed.
+ */
+ for (;;) {
+ hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+ lock = rht_bucket_lock(tbl, hash);
+ spin_lock_bh(lock);
+
+ if (tbl->rehash <= hash)
+ break;
+
+ spin_unlock_bh(lock);
+ tbl = rcu_dereference(tbl->future_tbl);
+ }
+
+ data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
+ new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
+ if (PTR_ERR(new_tbl) != -EEXIST)
+ data = ERR_CAST(new_tbl);
+
+ while (!IS_ERR_OR_NULL(new_tbl)) {
+ tbl = new_tbl;
+ hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+ spin_lock_nested(rht_bucket_lock(tbl, hash),
+ SINGLE_DEPTH_NESTING);
+
+ data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
+ new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
+ if (PTR_ERR(new_tbl) != -EEXIST)
+ data = ERR_CAST(new_tbl);
+
+ spin_unlock(rht_bucket_lock(tbl, hash));
+ }
+
+ spin_unlock_bh(lock);
+
+ if (PTR_ERR(data) == -EAGAIN)
+ data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
+ -EAGAIN);
+
+ return data;
+}
+
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+ struct rhash_head *obj)
+{
+ void *data;
+
+ do {
+ rcu_read_lock();
+ data = rhashtable_try_insert(ht, key, obj);
+ rcu_read_unlock();
+ } while (PTR_ERR(data) == -EAGAIN);
+
+ return data;
}
EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
/**
- * rhashtable_walk_init - Initialise an iterator
+ * rhashtable_walk_enter - Initialise an iterator
* @ht: Table to walk over
* @iter: Hash table Iterator
- * @gfp: GFP flags for allocations
*
* This function prepares a hash table walk.
*
@@ -502,30 +605,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
* This function may sleep so you must not call it from interrupt
* context or with spin locks held.
*
- * You must call rhashtable_walk_exit if this function returns
- * successfully.
+ * You must call rhashtable_walk_exit after this function returns.
*/
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
- gfp_t gfp)
+void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
{
iter->ht = ht;
iter->p = NULL;
iter->slot = 0;
iter->skip = 0;
- iter->walker = kmalloc(sizeof(*iter->walker), gfp);
- if (!iter->walker)
- return -ENOMEM;
-
spin_lock(&ht->lock);
- iter->walker->tbl =
+ iter->walker.tbl =
rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
- list_add(&iter->walker->list, &iter->walker->tbl->walkers);
+ list_add(&iter->walker.list, &iter->walker.tbl->walkers);
spin_unlock(&ht->lock);
-
- return 0;
}
-EXPORT_SYMBOL_GPL(rhashtable_walk_init);
+EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
/**
* rhashtable_walk_exit - Free an iterator
@@ -536,10 +631,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
void rhashtable_walk_exit(struct rhashtable_iter *iter)
{
spin_lock(&iter->ht->lock);
- if (iter->walker->tbl)
- list_del(&iter->walker->list);
+ if (iter->walker.tbl)
+ list_del(&iter->walker.list);
spin_unlock(&iter->ht->lock);
- kfree(iter->walker);
}
EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
@@ -565,12 +659,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
rcu_read_lock();
spin_lock(&ht->lock);
- if (iter->walker->tbl)
- list_del(&iter->walker->list);
+ if (iter->walker.tbl)
+ list_del(&iter->walker.list);
spin_unlock(&ht->lock);
- if (!iter->walker->tbl) {
- iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
+ if (!iter->walker.tbl) {
+ iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
return -EAGAIN;
}
@@ -592,12 +686,17 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
*/
void *rhashtable_walk_next(struct rhashtable_iter *iter)
{
- struct bucket_table *tbl = iter->walker->tbl;
+ struct bucket_table *tbl = iter->walker.tbl;
+ struct rhlist_head *list = iter->list;
struct rhashtable *ht = iter->ht;
struct rhash_head *p = iter->p;
+ bool rhlist = ht->rhlist;
if (p) {
- p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
+ if (!rhlist || !(list = rcu_dereference(list->next))) {
+ p = rcu_dereference(p->next);
+ list = container_of(p, struct rhlist_head, rhead);
+ }
goto next;
}
@@ -605,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
int skip = iter->skip;
rht_for_each_rcu(p, tbl, iter->slot) {
+ if (rhlist) {
+ list = container_of(p, struct rhlist_head,
+ rhead);
+ do {
+ if (!skip)
+ goto next;
+ skip--;
+ list = rcu_dereference(list->next);
+ } while (list);
+
+ continue;
+ }
if (!skip)
break;
skip--;
@@ -614,7 +725,8 @@ next:
if (!rht_is_a_nulls(p)) {
iter->skip++;
iter->p = p;
- return rht_obj(ht, p);
+ iter->list = list;
+ return rht_obj(ht, rhlist ? &list->rhead : p);
}
iter->skip = 0;
@@ -625,8 +737,8 @@ next:
/* Ensure we see any new tables. */
smp_rmb();
- iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
- if (iter->walker->tbl) {
+ iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (iter->walker.tbl) {
iter->slot = 0;
iter->skip = 0;
return ERR_PTR(-EAGAIN);
@@ -646,7 +758,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
__releases(RCU)
{
struct rhashtable *ht;
- struct bucket_table *tbl = iter->walker->tbl;
+ struct bucket_table *tbl = iter->walker.tbl;
if (!tbl)
goto out;
@@ -655,9 +767,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
spin_lock(&ht->lock);
if (tbl->rehash < tbl->size)
- list_add(&iter->walker->list, &tbl->walkers);
+ list_add(&iter->walker.list, &tbl->walkers);
else
- iter->walker->tbl = NULL;
+ iter->walker.tbl = NULL;
spin_unlock(&ht->lock);
iter->p = NULL;
@@ -803,6 +915,48 @@ int rhashtable_init(struct rhashtable *ht,
EXPORT_SYMBOL_GPL(rhashtable_init);
/**
+ * rhltable_init - initialize a new hash list table
+ * @hlt: hash list table to be initialized
+ * @params: configuration parameters
+ *
+ * Initializes a new hash list table.
+ *
+ * See documentation for rhashtable_init.
+ */
+int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+{
+ int err;
+
+ /* No rhlist NULLs marking for now. */
+ if (params->nulls_base)
+ return -EINVAL;
+
+ err = rhashtable_init(&hlt->ht, params);
+ hlt->ht.rhlist = true;
+ return err;
+}
+EXPORT_SYMBOL_GPL(rhltable_init);
+
+static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
+ void (*free_fn)(void *ptr, void *arg),
+ void *arg)
+{
+ struct rhlist_head *list;
+
+ if (!ht->rhlist) {
+ free_fn(rht_obj(ht, obj), arg);
+ return;
+ }
+
+ list = container_of(obj, struct rhlist_head, rhead);
+ do {
+ obj = &list->rhead;
+ list = rht_dereference(list->next, ht);
+ free_fn(rht_obj(ht, obj), arg);
+ } while (list);
+}
+
+/**
* rhashtable_free_and_destroy - free elements and destroy hash table
* @ht: the hash table to destroy
* @free_fn: callback to release resources of element
@@ -839,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
pos = next,
next = !rht_is_a_nulls(pos) ?
rht_dereference(pos->next, ht) : NULL)
- free_fn(rht_obj(ht, pos), arg);
+ rhashtable_free_one(ht, pos, free_fn, arg);
}
}
diff --git a/lib/sg_pool.c b/lib/sg_pool.c
new file mode 100644
index 000000000000..6dd30615a201
--- /dev/null
+++ b/lib/sg_pool.c
@@ -0,0 +1,172 @@
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+#define SG_MEMPOOL_NR ARRAY_SIZE(sg_pools)
+#define SG_MEMPOOL_SIZE 2
+
+struct sg_pool {
+ size_t size;
+ char *name;
+ struct kmem_cache *slab;
+ mempool_t *pool;
+};
+
+#define SP(x) { .size = x, "sgpool-" __stringify(x) }
+#if (SG_CHUNK_SIZE < 32)
+#error SG_CHUNK_SIZE is too small (must be 32 or greater)
+#endif
+static struct sg_pool sg_pools[] = {
+ SP(8),
+ SP(16),
+#if (SG_CHUNK_SIZE > 32)
+ SP(32),
+#if (SG_CHUNK_SIZE > 64)
+ SP(64),
+#if (SG_CHUNK_SIZE > 128)
+ SP(128),
+#if (SG_CHUNK_SIZE > 256)
+#error SG_CHUNK_SIZE is too large (256 MAX)
+#endif
+#endif
+#endif
+#endif
+ SP(SG_CHUNK_SIZE)
+};
+#undef SP
+
+static inline unsigned int sg_pool_index(unsigned short nents)
+{
+ unsigned int index;
+
+ BUG_ON(nents > SG_CHUNK_SIZE);
+
+ if (nents <= 8)
+ index = 0;
+ else
+ index = get_count_order(nents) - 3;
+
+ return index;
+}
+
+static void sg_pool_free(struct scatterlist *sgl, unsigned int nents)
+{
+ struct sg_pool *sgp;
+
+ sgp = sg_pools + sg_pool_index(nents);
+ mempool_free(sgl, sgp->pool);
+}
+
+static struct scatterlist *sg_pool_alloc(unsigned int nents, gfp_t gfp_mask)
+{
+ struct sg_pool *sgp;
+
+ sgp = sg_pools + sg_pool_index(nents);
+ return mempool_alloc(sgp->pool, gfp_mask);
+}
+
+/**
+ * sg_free_table_chained - Free a previously mapped sg table
+ * @table: The sg table header to use
+ * @first_chunk: was first_chunk not NULL in sg_alloc_table_chained?
+ *
+ * Description:
+ * Free an sg table previously allocated and setup with
+ * sg_alloc_table_chained().
+ *
+ **/
+void sg_free_table_chained(struct sg_table *table, bool first_chunk)
+{
+ if (first_chunk && table->orig_nents <= SG_CHUNK_SIZE)
+ return;
+ __sg_free_table(table, SG_CHUNK_SIZE, first_chunk, sg_pool_free);
+}
+EXPORT_SYMBOL_GPL(sg_free_table_chained);
+
+/**
+ * sg_alloc_table_chained - Allocate and chain SGLs in an sg table
+ * @table: The sg table header to use
+ * @nents: Number of entries in sg list
+ * @first_chunk: first SGL
+ *
+ * Description:
+ * Allocate and chain SGLs in an sg table. If @nents@ is larger than
+ * SG_CHUNK_SIZE a chained sg table will be setup.
+ *
+ **/
+int sg_alloc_table_chained(struct sg_table *table, int nents,
+ struct scatterlist *first_chunk)
+{
+ int ret;
+
+ BUG_ON(!nents);
+
+ if (first_chunk) {
+ if (nents <= SG_CHUNK_SIZE) {
+ table->nents = table->orig_nents = nents;
+ sg_init_table(table->sgl, nents);
+ return 0;
+ }
+ }
+
+ ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE,
+ first_chunk, GFP_ATOMIC, sg_pool_alloc);
+ if (unlikely(ret))
+ sg_free_table_chained(table, (bool)first_chunk);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sg_alloc_table_chained);
+
+static __init int sg_pool_init(void)
+{
+ int i;
+
+ for (i = 0; i < SG_MEMPOOL_NR; i++) {
+ struct sg_pool *sgp = sg_pools + i;
+ int size = sgp->size * sizeof(struct scatterlist);
+
+ sgp->slab = kmem_cache_create(sgp->name, size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!sgp->slab) {
+ printk(KERN_ERR "SG_POOL: can't init sg slab %s\n",
+ sgp->name);
+ goto cleanup_sdb;
+ }
+
+ sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE,
+ sgp->slab);
+ if (!sgp->pool) {
+ printk(KERN_ERR "SG_POOL: can't init sg mempool %s\n",
+ sgp->name);
+ goto cleanup_sdb;
+ }
+ }
+
+ return 0;
+
+cleanup_sdb:
+ for (i = 0; i < SG_MEMPOOL_NR; i++) {
+ struct sg_pool *sgp = sg_pools + i;
+ if (sgp->pool)
+ mempool_destroy(sgp->pool);
+ if (sgp->slab)
+ kmem_cache_destroy(sgp->slab);
+ }
+
+ return -ENOMEM;
+}
+
+static __exit void sg_pool_exit(void)
+{
+ int i;
+
+ for (i = 0; i < SG_MEMPOOL_NR; i++) {
+ struct sg_pool *sgp = sg_pools + i;
+ mempool_destroy(sgp->pool);
+ kmem_cache_destroy(sgp->slab);
+ }
+}
+
+module_init(sg_pool_init);
+module_exit(sg_pool_exit);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 53ad6c0831ae..60f77f1d470a 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -242,6 +242,7 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
*/
alloc_flags &= ~GFP_ZONEMASK;
alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+ alloc_flags |= __GFP_NOWARN;
page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
if (page)
prealloc = page_address(page);
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 5c88204b6f1f..ecaac2c0526f 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -10,6 +10,10 @@
#include <linux/export.h>
#include <linux/ctype.h>
#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/string.h>
#include <linux/string_helpers.h>
@@ -534,3 +538,91 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
return p - dst;
}
EXPORT_SYMBOL(string_escape_mem);
+
+/*
+ * Return an allocated string that has been escaped of special characters
+ * and double quotes, making it safe to log in quotes.
+ */
+char *kstrdup_quotable(const char *src, gfp_t gfp)
+{
+ size_t slen, dlen;
+ char *dst;
+ const int flags = ESCAPE_HEX;
+ const char esc[] = "\f\n\r\t\v\a\e\\\"";
+
+ if (!src)
+ return NULL;
+ slen = strlen(src);
+
+ dlen = string_escape_mem(src, slen, NULL, 0, flags, esc);
+ dst = kmalloc(dlen + 1, gfp);
+ if (!dst)
+ return NULL;
+
+ WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen);
+ dst[dlen] = '\0';
+
+ return dst;
+}
+EXPORT_SYMBOL_GPL(kstrdup_quotable);
+
+/*
+ * Returns allocated NULL-terminated string containing process
+ * command line, with inter-argument NULLs replaced with spaces,
+ * and other special characters escaped.
+ */
+char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)
+{
+ char *buffer, *quoted;
+ int i, res;
+
+ buffer = kmalloc(PAGE_SIZE, GFP_TEMPORARY);
+ if (!buffer)
+ return NULL;
+
+ res = get_cmdline(task, buffer, PAGE_SIZE - 1);
+ buffer[res] = '\0';
+
+ /* Collapse trailing NULLs, leave res pointing to last non-NULL. */
+ while (--res >= 0 && buffer[res] == '\0')
+ ;
+
+ /* Replace inter-argument NULLs. */
+ for (i = 0; i <= res; i++)
+ if (buffer[i] == '\0')
+ buffer[i] = ' ';
+
+ /* Make sure result is printable. */
+ quoted = kstrdup_quotable(buffer, gfp);
+ kfree(buffer);
+ return quoted;
+}
+EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline);
+
+/*
+ * Returns allocated NULL-terminated string containing pathname,
+ * with special characters escaped, able to be safely logged. If
+ * there is an error, the leading character will be "<".
+ */
+char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
+{
+ char *temp, *pathname;
+
+ if (!file)
+ return kstrdup("<unknown>", gfp);
+
+ /* We add 11 spaces for ' (deleted)' to be appended */
+ temp = kmalloc(PATH_MAX + 11, GFP_TEMPORARY);
+ if (!temp)
+ return kstrdup("<no_memory>", gfp);
+
+ pathname = file_path(file, temp, PATH_MAX + 11);
+ if (IS_ERR(pathname))
+ pathname = kstrdup("<too_long>", gfp);
+ else
+ pathname = kstrdup_quotable(pathname, gfp);
+
+ kfree(temp);
+ return pathname;
+}
+EXPORT_SYMBOL_GPL(kstrdup_quotable_file);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 33840324138c..9c5fe8110413 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,5 +1,6 @@
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/kasan-checks.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -39,8 +40,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
unsigned long c, data;
/* Fall back to byte-at-a-time if we get a page fault */
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
- break;
+ unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
+
*(unsigned long *)(dst+res) = c;
if (has_zero(c, &data, &constants)) {
data = prep_zero_mask(c, data, &constants);
@@ -55,8 +56,7 @@ byte_at_a_time:
while (max) {
char c;
- if (unlikely(unsafe_get_user(c,src+res)))
- return -EFAULT;
+ unsafe_get_user(c,src+res, efault);
dst[res] = c;
if (!c)
return res;
@@ -75,6 +75,7 @@ byte_at_a_time:
* Nope: we hit the address space limit, and we still had more
* characters the caller would have wanted. That's an EFAULT.
*/
+efault:
return -EFAULT;
}
@@ -109,6 +110,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
unsigned long max = max_addr - src_addr;
long retval;
+ kasan_check_write(dst, count);
user_access_begin();
retval = do_strncpy_from_user(dst, src, count, max);
user_access_end();
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 2625943625d7..8e105ed4df12 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
src -= align;
max += align;
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)src)))
- return 0;
+ unsafe_get_user(c, (unsigned long __user *)src, efault);
c |= aligned_byte_mask(align);
for (;;) {
@@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
if (unlikely(max <= sizeof(unsigned long)))
break;
max -= sizeof(unsigned long);
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
- return 0;
+ unsafe_get_user(c, (unsigned long __user *)(src+res), efault);
}
res -= align;
@@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
* Nope: we hit the address space limit, and we still had more
* characters the caller would have wanted. That's 0.
*/
+efault:
return 0;
}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 76f29ecba8f4..22e13a0e19d7 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -738,7 +738,7 @@ swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
phys_addr_t map, phys = page_to_phys(page) + offset;
dma_addr_t dev_addr = phys_to_dma(dev, phys);
@@ -807,7 +807,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unmap_single(hwdev, dev_addr, size, dir);
}
@@ -877,7 +877,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_device);
*/
int
swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *sg;
int i;
@@ -914,7 +914,7 @@ int
swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
enum dma_data_direction dir)
{
- return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
+ return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, 0);
}
EXPORT_SYMBOL(swiotlb_map_sg);
@@ -924,7 +924,8 @@ EXPORT_SYMBOL(swiotlb_map_sg);
*/
void
swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
+ int nelems, enum dma_data_direction dir,
+ unsigned long attrs)
{
struct scatterlist *sg;
int i;
@@ -941,7 +942,7 @@ void
swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
enum dma_data_direction dir)
{
- return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
+ return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, 0);
}
EXPORT_SYMBOL(swiotlb_unmap_sg);
diff --git a/lib/syscall.c b/lib/syscall.c
index e30e03932480..63239e097b13 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -7,9 +7,19 @@ static int collect_syscall(struct task_struct *target, long *callno,
unsigned long args[6], unsigned int maxargs,
unsigned long *sp, unsigned long *pc)
{
- struct pt_regs *regs = task_pt_regs(target);
- if (unlikely(!regs))
+ struct pt_regs *regs;
+
+ if (!try_get_task_stack(target)) {
+ /* Task has no stack, so the task isn't in a syscall. */
+ *callno = -1;
+ return 0;
+ }
+
+ regs = task_pt_regs(target);
+ if (unlikely(!regs)) {
+ put_task_stack(target);
return -EAGAIN;
+ }
*sp = user_stack_pointer(regs);
*pc = instruction_pointer(regs);
@@ -18,6 +28,7 @@ static int collect_syscall(struct task_struct *target, long *callno,
if (*callno != -1L && maxargs > 0)
syscall_get_arguments(target, regs, 0, maxargs, args);
+ put_task_stack(target);
return 0;
}
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 93f45011a59d..94346b4d8984 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size)
skb->hash = SKB_HASH;
skb->queue_mapping = SKB_QUEUE_MAP;
skb->vlan_tci = SKB_VLAN_TCI;
+ skb->vlan_proto = htons(ETH_P_IP);
skb->dev = &dev;
skb->dev->ifindex = SKB_DEV_IFINDEX;
skb->dev->type = SKB_DEV_TYPE;
diff --git a/lib/test_hash.c b/lib/test_hash.c
new file mode 100644
index 000000000000..cac20c5fb304
--- /dev/null
+++ b/lib/test_hash.c
@@ -0,0 +1,256 @@
+/*
+ * Test cases for <linux/hash.h> and <linux/stringhash.h>
+ * This just verifies that various ways of computing a hash
+ * produce the same thing and, for cases where a k-bit hash
+ * value is requested, is of the requested size.
+ *
+ * We fill a buffer with a 255-byte null-terminated string,
+ * and use both full_name_hash() and hashlen_string() to hash the
+ * substrings from i to j, where 0 <= i < j < 256.
+ *
+ * The returned values are used to check that __hash_32() and
+ * __hash_32_generic() compute the same thing. Likewise hash_32()
+ * and hash_64().
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n"
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/hash.h>
+#include <linux/stringhash.h>
+#include <linux/printk.h>
+
+/* 32-bit XORSHIFT generator. Seed must not be zero. */
+static u32 __init __attribute_const__
+xorshift(u32 seed)
+{
+ seed ^= seed << 13;
+ seed ^= seed >> 17;
+ seed ^= seed << 5;
+ return seed;
+}
+
+/* Given a non-zero x, returns a non-zero byte. */
+static u8 __init __attribute_const__
+mod255(u32 x)
+{
+ x = (x & 0xffff) + (x >> 16); /* 1 <= x <= 0x1fffe */
+ x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x2fd */
+ x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0x100 */
+ x = (x & 0xff) + (x >> 8); /* 1 <= x <= 0xff */
+ return x;
+}
+
+/* Fill the buffer with non-zero bytes. */
+static void __init
+fill_buf(char *buf, size_t len, u32 seed)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ seed = xorshift(seed);
+ buf[i] = mod255(seed);
+ }
+}
+
+/*
+ * Test the various integer hash functions. h64 (or its low-order bits)
+ * is the integer to hash. hash_or accumulates the OR of the hash values,
+ * which are later checked to see that they cover all the requested bits.
+ *
+ * Because these functions (as opposed to the string hashes) are all
+ * inline, the code being tested is actually in the module, and you can
+ * recompile and re-test the module without rebooting.
+ */
+static bool __init
+test_int_hash(unsigned long long h64, u32 hash_or[2][33])
+{
+ int k;
+ u32 h0 = (u32)h64, h1, h2;
+
+ /* Test __hash32 */
+ hash_or[0][0] |= h1 = __hash_32(h0);
+#ifdef HAVE_ARCH__HASH_32
+ hash_or[1][0] |= h2 = __hash_32_generic(h0);
+#if HAVE_ARCH__HASH_32 == 1
+ if (h1 != h2) {
+ pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
+ h0, h1, h2);
+ return false;
+ }
+#endif
+#endif
+
+ /* Test k = 1..32 bits */
+ for (k = 1; k <= 32; k++) {
+ u32 const m = ((u32)2 << (k-1)) - 1; /* Low k bits set */
+
+ /* Test hash_32 */
+ hash_or[0][k] |= h1 = hash_32(h0, k);
+ if (h1 > m) {
+ pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m);
+ return false;
+ }
+#ifdef HAVE_ARCH_HASH_32
+ h2 = hash_32_generic(h0, k);
+#if HAVE_ARCH_HASH_32 == 1
+ if (h1 != h2) {
+ pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() "
+ " = %#x", h0, k, h1, h2);
+ return false;
+ }
+#else
+ if (h2 > m) {
+ pr_err("hash_32_generic(%#x, %d) = %#x > %#x",
+ h0, k, h1, m);
+ return false;
+ }
+#endif
+#endif
+ /* Test hash_64 */
+ hash_or[1][k] |= h1 = hash_64(h64, k);
+ if (h1 > m) {
+ pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m);
+ return false;
+ }
+#ifdef HAVE_ARCH_HASH_64
+ h2 = hash_64_generic(h64, k);
+#if HAVE_ARCH_HASH_64 == 1
+ if (h1 != h2) {
+ pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() "
+ "= %#x", h64, k, h1, h2);
+ return false;
+ }
+#else
+ if (h2 > m) {
+ pr_err("hash_64_generic(%#llx, %d) = %#x > %#x",
+ h64, k, h1, m);
+ return false;
+ }
+#endif
+#endif
+ }
+
+ (void)h2; /* Suppress unused variable warning */
+ return true;
+}
+
+#define SIZE 256 /* Run time is cubic in SIZE */
+
+static int __init
+test_hash_init(void)
+{
+ char buf[SIZE+1];
+ u32 string_or = 0, hash_or[2][33] = { { 0, } };
+ unsigned tests = 0;
+ unsigned long long h64 = 0;
+ int i, j;
+
+ fill_buf(buf, SIZE, 1);
+
+ /* Test every possible non-empty substring in the buffer. */
+ for (j = SIZE; j > 0; --j) {
+ buf[j] = '\0';
+
+ for (i = 0; i <= j; i++) {
+ u64 hashlen = hashlen_string(buf+i, buf+i);
+ u32 h0 = full_name_hash(buf+i, buf+i, j-i);
+
+ /* Check that hashlen_string gets the length right */
+ if (hashlen_len(hashlen) != j-i) {
+ pr_err("hashlen_string(%d..%d) returned length"
+ " %u, expected %d",
+ i, j, hashlen_len(hashlen), j-i);
+ return -EINVAL;
+ }
+ /* Check that the hashes match */
+ if (hashlen_hash(hashlen) != h0) {
+ pr_err("hashlen_string(%d..%d) = %08x != "
+ "full_name_hash() = %08x",
+ i, j, hashlen_hash(hashlen), h0);
+ return -EINVAL;
+ }
+
+ string_or |= h0;
+ h64 = h64 << 32 | h0; /* For use with hash_64 */
+ if (!test_int_hash(h64, hash_or))
+ return -EINVAL;
+ tests++;
+ } /* i */
+ } /* j */
+
+ /* The OR of all the hash values should cover all the bits */
+ if (~string_or) {
+ pr_err("OR of all string hash results = %#x != %#x",
+ string_or, -1u);
+ return -EINVAL;
+ }
+ if (~hash_or[0][0]) {
+ pr_err("OR of all __hash_32 results = %#x != %#x",
+ hash_or[0][0], -1u);
+ return -EINVAL;
+ }
+#ifdef HAVE_ARCH__HASH_32
+#if HAVE_ARCH__HASH_32 != 1 /* Test is pointless if results match */
+ if (~hash_or[1][0]) {
+ pr_err("OR of all __hash_32_generic results = %#x != %#x",
+ hash_or[1][0], -1u);
+ return -EINVAL;
+ }
+#endif
+#endif
+
+ /* Likewise for all the i-bit hash values */
+ for (i = 1; i <= 32; i++) {
+ u32 const m = ((u32)2 << (i-1)) - 1; /* Low i bits set */
+
+ if (hash_or[0][i] != m) {
+ pr_err("OR of all hash_32(%d) results = %#x "
+ "(%#x expected)", i, hash_or[0][i], m);
+ return -EINVAL;
+ }
+ if (hash_or[1][i] != m) {
+ pr_err("OR of all hash_64(%d) results = %#x "
+ "(%#x expected)", i, hash_or[1][i], m);
+ return -EINVAL;
+ }
+ }
+
+ /* Issue notices about skipped tests. */
+#ifdef HAVE_ARCH__HASH_32
+#if HAVE_ARCH__HASH_32 != 1
+ pr_info("__hash_32() is arch-specific; not compared to generic.");
+#endif
+#else
+ pr_info("__hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_32
+#if HAVE_ARCH_HASH_32 != 1
+ pr_info("hash_32() is arch-specific; not compared to generic.");
+#endif
+#else
+ pr_info("hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_64
+#if HAVE_ARCH_HASH_64 != 1
+ pr_info("hash_64() is arch-specific; not compared to generic.");
+#endif
+#else
+ pr_info("hash_64() has no arch implementation to test.");
+#endif
+
+ pr_notice("%u tests passed.", tests);
+
+ return 0;
+}
+
+static void __exit test_hash_exit(void)
+{
+}
+
+module_init(test_hash_init); /* Does everything */
+module_exit(test_hash_exit); /* Does nothing */
+
+MODULE_LICENSE("GPL");
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 82169fbf2453..5e51872b3fc1 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -12,9 +12,12 @@
#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
#include <linux/kernel.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/uaccess.h>
#include <linux/module.h>
static noinline void __init kmalloc_oob_right(void)
@@ -344,6 +347,70 @@ static noinline void __init kasan_stack_oob(void)
*(volatile char *)p;
}
+static noinline void __init ksize_unpoisons_memory(void)
+{
+ char *ptr;
+ size_t size = 123, real_size = size;
+
+ pr_info("ksize() unpoisons the whole allocated chunk\n");
+ ptr = kmalloc(size, GFP_KERNEL);
+ if (!ptr) {
+ pr_err("Allocation failed\n");
+ return;
+ }
+ real_size = ksize(ptr);
+ /* This access doesn't trigger an error. */
+ ptr[size] = 'x';
+ /* This one does. */
+ ptr[real_size] = 'y';
+ kfree(ptr);
+}
+
+static noinline void __init copy_user_test(void)
+{
+ char *kmem;
+ char __user *usermem;
+ size_t size = 10;
+ int unused;
+
+ kmem = kmalloc(size, GFP_KERNEL);
+ if (!kmem)
+ return;
+
+ usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0);
+ if (IS_ERR(usermem)) {
+ pr_err("Failed to allocate user memory\n");
+ kfree(kmem);
+ return;
+ }
+
+ pr_info("out-of-bounds in copy_from_user()\n");
+ unused = copy_from_user(kmem, usermem, size + 1);
+
+ pr_info("out-of-bounds in copy_to_user()\n");
+ unused = copy_to_user(usermem, kmem, size + 1);
+
+ pr_info("out-of-bounds in __copy_from_user()\n");
+ unused = __copy_from_user(kmem, usermem, size + 1);
+
+ pr_info("out-of-bounds in __copy_to_user()\n");
+ unused = __copy_to_user(usermem, kmem, size + 1);
+
+ pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
+ unused = __copy_from_user_inatomic(kmem, usermem, size + 1);
+
+ pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
+ unused = __copy_to_user_inatomic(usermem, kmem, size + 1);
+
+ pr_info("out-of-bounds in strncpy_from_user()\n");
+ unused = strncpy_from_user(kmem, usermem, size + 1);
+
+ vm_munmap((unsigned long)usermem, PAGE_SIZE);
+ kfree(kmem);
+}
+
static int __init kmalloc_tests_init(void)
{
kmalloc_oob_right();
@@ -367,6 +434,8 @@ static int __init kmalloc_tests_init(void)
kmem_cache_oob();
kasan_stack_oob();
kasan_global_oob();
+ ksize_unpoisons_memory();
+ copy_user_test();
return -EAGAIN;
}
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 297fdb5e74bd..64e899b63337 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)");
static int max_size = 0;
module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
+MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
static bool shrinking = false;
module_param(shrinking, bool, 0);
diff --git a/lib/test_uuid.c b/lib/test_uuid.c
new file mode 100644
index 000000000000..547d3127a3cf
--- /dev/null
+++ b/lib/test_uuid.c
@@ -0,0 +1,133 @@
+/*
+ * Test cases for lib/uuid.c module.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/uuid.h>
+
+struct test_uuid_data {
+ const char *uuid;
+ uuid_le le;
+ uuid_be be;
+};
+
+static const struct test_uuid_data test_uuid_test_data[] = {
+ {
+ .uuid = "c33f4995-3701-450e-9fbf-206a2e98e576",
+ .le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+ .be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76),
+ },
+ {
+ .uuid = "64b4371c-77c1-48f9-8221-29f054fc023b",
+ .le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+ .be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b),
+ },
+ {
+ .uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84",
+ .le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+ .be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84),
+ },
+};
+
+static const char * const test_uuid_wrong_data[] = {
+ "c33f4995-3701-450e-9fbf206a2e98e576 ", /* no hyphen(s) */
+ "64b4371c-77c1-48f9-8221-29f054XX023b", /* invalid character(s) */
+ "0cb4ddff-a545-4401-9d06-688af53e", /* not enough data */
+};
+
+static unsigned total_tests __initdata;
+static unsigned failed_tests __initdata;
+
+static void __init test_uuid_failed(const char *prefix, bool wrong, bool be,
+ const char *data, const char *actual)
+{
+ pr_err("%s test #%u %s %s data: '%s'\n",
+ prefix,
+ total_tests,
+ wrong ? "passed on wrong" : "failed on",
+ be ? "BE" : "LE",
+ data);
+ if (actual && *actual)
+ pr_err("%s test #%u actual data: '%s'\n",
+ prefix,
+ total_tests,
+ actual);
+ failed_tests++;
+}
+
+static void __init test_uuid_test(const struct test_uuid_data *data)
+{
+ uuid_le le;
+ uuid_be be;
+ char buf[48];
+
+ /* LE */
+ total_tests++;
+ if (uuid_le_to_bin(data->uuid, &le))
+ test_uuid_failed("conversion", false, false, data->uuid, NULL);
+
+ total_tests++;
+ if (uuid_le_cmp(data->le, le)) {
+ sprintf(buf, "%pUl", &le);
+ test_uuid_failed("cmp", false, false, data->uuid, buf);
+ }
+
+ /* BE */
+ total_tests++;
+ if (uuid_be_to_bin(data->uuid, &be))
+ test_uuid_failed("conversion", false, true, data->uuid, NULL);
+
+ total_tests++;
+ if (uuid_be_cmp(data->be, be)) {
+ sprintf(buf, "%pUb", &be);
+ test_uuid_failed("cmp", false, true, data->uuid, buf);
+ }
+}
+
+static void __init test_uuid_wrong(const char *data)
+{
+ uuid_le le;
+ uuid_be be;
+
+ /* LE */
+ total_tests++;
+ if (!uuid_le_to_bin(data, &le))
+ test_uuid_failed("negative", true, false, data, NULL);
+
+ /* BE */
+ total_tests++;
+ if (!uuid_be_to_bin(data, &be))
+ test_uuid_failed("negative", true, true, data, NULL);
+}
+
+static int __init test_uuid_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_uuid_test_data); i++)
+ test_uuid_test(&test_uuid_test_data[i]);
+
+ for (i = 0; i < ARRAY_SIZE(test_uuid_wrong_data); i++)
+ test_uuid_wrong(test_uuid_wrong_data[i]);
+
+ if (failed_tests == 0)
+ pr_info("all %u tests passed\n", total_tests);
+ else
+ pr_err("failed %u out of %u tests\n", failed_tests, total_tests);
+
+ return failed_tests ? -EINVAL : 0;
+}
+module_init(test_uuid_init);
+
+static void __exit test_uuid_exit(void)
+{
+ /* do nothing */
+}
+module_exit(test_uuid_exit);
+
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/ubsan.c b/lib/ubsan.c
index 8799ae5e2e42..fb0409df1bcf 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -308,7 +308,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data,
return;
ubsan_prologue(&data->location, &flags);
- pr_err("%s address %pk with insufficient space\n",
+ pr_err("%s address %p with insufficient space\n",
type_check_kinds[data->type_check_kind],
(void *) ptr);
pr_err("for an object of type %s\n", data->type->type_name);
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index f0b323abb4c6..ae8d2491133c 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -56,7 +56,7 @@ ucs2_utf8size(const ucs2_char_t *src)
unsigned long i;
unsigned long j = 0;
- for (i = 0; i < ucs2_strlen(src); i++) {
+ for (i = 0; src[i]; i++) {
u16 c = src[i];
if (c >= 0x800)
diff --git a/lib/usercopy.c b/lib/usercopy.c
deleted file mode 100644
index 4f5b1ddbcd25..000000000000
--- a/lib/usercopy.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <linux/export.h>
-#include <linux/bug.h>
-#include <linux/uaccess.h>
-
-void copy_from_user_overflow(void)
-{
- WARN(1, "Buffer overflow detected!\n");
-}
-EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/lib/uuid.c b/lib/uuid.c
index 398821e4dce1..37687af77ff8 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -1,7 +1,7 @@
/*
* Unified UUID/GUID definition
*
- * Copyright (C) 2009, Intel Corp.
+ * Copyright (C) 2009, 2016 Intel Corp.
* Huang Ying <ying.huang@intel.com>
*
* This program is free software; you can redistribute it and/or
@@ -12,17 +12,40 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
#include <linux/export.h>
#include <linux/uuid.h>
#include <linux/random.h>
+const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
+EXPORT_SYMBOL(uuid_le_index);
+const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+EXPORT_SYMBOL(uuid_be_index);
+
+/***************************************************************
+ * Random UUID interface
+ *
+ * Used here for a Boot ID, but can be useful for other kernel
+ * drivers.
+ ***************************************************************/
+
+/*
+ * Generate random UUID
+ */
+void generate_random_uuid(unsigned char uuid[16])
+{
+ get_random_bytes(uuid, 16);
+ /* Set UUID version to 4 --- truly random generation */
+ uuid[6] = (uuid[6] & 0x0F) | 0x40;
+ /* Set the UUID variant to DCE */
+ uuid[8] = (uuid[8] & 0x3F) | 0x80;
+}
+EXPORT_SYMBOL(generate_random_uuid);
+
static void __uuid_gen_common(__u8 b[16])
{
prandom_bytes(b, 16);
@@ -45,3 +68,61 @@ void uuid_be_gen(uuid_be *bu)
bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
}
EXPORT_SYMBOL_GPL(uuid_be_gen);
+
+/**
+ * uuid_is_valid - checks if UUID string valid
+ * @uuid: UUID string to check
+ *
+ * Description:
+ * It checks if the UUID string is following the format:
+ * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ * where x is a hex digit.
+ *
+ * Return: true if input is valid UUID string.
+ */
+bool uuid_is_valid(const char *uuid)
+{
+ unsigned int i;
+
+ for (i = 0; i < UUID_STRING_LEN; i++) {
+ if (i == 8 || i == 13 || i == 18 || i == 23) {
+ if (uuid[i] != '-')
+ return false;
+ } else if (!isxdigit(uuid[i])) {
+ return false;
+ }
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(uuid_is_valid);
+
+static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16])
+{
+ static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34};
+ unsigned int i;
+
+ if (!uuid_is_valid(uuid))
+ return -EINVAL;
+
+ for (i = 0; i < 16; i++) {
+ int hi = hex_to_bin(uuid[si[i] + 0]);
+ int lo = hex_to_bin(uuid[si[i] + 1]);
+
+ b[ei[i]] = (hi << 4) | lo;
+ }
+
+ return 0;
+}
+
+int uuid_le_to_bin(const char *uuid, uuid_le *u)
+{
+ return __uuid_to_bin(uuid, u->b, uuid_le_index);
+}
+EXPORT_SYMBOL(uuid_le_to_bin);
+
+int uuid_be_to_bin(const char *uuid, uuid_be *u)
+{
+ return __uuid_to_bin(uuid, u->b, uuid_be_index);
+}
+EXPORT_SYMBOL(uuid_be_to_bin);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index ccb664b54280..0967771d8f7f 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -30,6 +30,7 @@
#include <linux/ioport.h>
#include <linux/dcache.h>
#include <linux/cred.h>
+#include <linux/uuid.h>
#include <net/addrconf.h>
#ifdef CONFIG_BLOCK
#include <linux/blkdev.h>
@@ -1304,19 +1305,17 @@ static noinline_for_stack
char *uuid_string(char *buf, char *end, const u8 *addr,
struct printf_spec spec, const char *fmt)
{
- char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
+ char uuid[UUID_STRING_LEN + 1];
char *p = uuid;
int i;
- static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
- static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
- const u8 *index = be;
+ const u8 *index = uuid_be_index;
bool uc = false;
switch (*(++fmt)) {
case 'L':
uc = true; /* fall-through */
case 'l':
- index = le;
+ index = uuid_le_index;
break;
case 'B':
uc = true;
@@ -1324,7 +1323,10 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
}
for (i = 0; i < 16; i++) {
- p = hex_byte_pack(p, addr[index[i]]);
+ if (uc)
+ p = hex_byte_pack_upper(p, addr[index[i]]);
+ else
+ p = hex_byte_pack(p, addr[index[i]]);
switch (i) {
case 3:
case 5:
@@ -1337,13 +1339,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
*p = 0;
- if (uc) {
- p = uuid;
- do {
- *p = toupper(*p);
- } while (*(++p));
- }
-
return string(buf, end, uuid, spec);
}
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
new file mode 100644
index 000000000000..c8420d404926
--- /dev/null
+++ b/lib/win_minmax.c
@@ -0,0 +1,98 @@
+/**
+ * lib/minmax.c: windowed min/max tracker
+ *
+ * Kathleen Nichols' algorithm for tracking the minimum (or maximum)
+ * value of a data stream over some fixed time interval. (E.g.,
+ * the minimum RTT over the past five minutes.) It uses constant
+ * space and constant time per update yet almost always delivers
+ * the same minimum as an implementation that has to keep all the
+ * data in the window.
+ *
+ * The algorithm keeps track of the best, 2nd best & 3rd best min
+ * values, maintaining an invariant that the measurement time of
+ * the n'th best >= n-1'th best. It also makes sure that the three
+ * values are widely separated in the time window since that bounds
+ * the worse case error when that data is monotonically increasing
+ * over the window.
+ *
+ * Upon getting a new min, we can forget everything earlier because
+ * it has no value - the new min is <= everything else in the window
+ * by definition and it's the most recent. So we restart fresh on
+ * every new min and overwrites 2nd & 3rd choices. The same property
+ * holds for 2nd & 3rd best.
+ */
+#include <linux/module.h>
+#include <linux/win_minmax.h>
+
+/* As time advances, update the 1st, 2nd, and 3rd choices. */
+static u32 minmax_subwin_update(struct minmax *m, u32 win,
+ const struct minmax_sample *val)
+{
+ u32 dt = val->t - m->s[0].t;
+
+ if (unlikely(dt > win)) {
+ /*
+ * Passed entire window without a new val so make 2nd
+ * choice the new val & 3rd choice the new 2nd choice.
+ * we may have to iterate this since our 2nd choice
+ * may also be outside the window (we checked on entry
+ * that the third choice was in the window).
+ */
+ m->s[0] = m->s[1];
+ m->s[1] = m->s[2];
+ m->s[2] = *val;
+ if (unlikely(val->t - m->s[0].t > win)) {
+ m->s[0] = m->s[1];
+ m->s[1] = m->s[2];
+ m->s[2] = *val;
+ }
+ } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) {
+ /*
+ * We've passed a quarter of the window without a new val
+ * so take a 2nd choice from the 2nd quarter of the window.
+ */
+ m->s[2] = m->s[1] = *val;
+ } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) {
+ /*
+ * We've passed half the window without finding a new val
+ * so take a 3rd choice from the last half of the window
+ */
+ m->s[2] = *val;
+ }
+ return m->s[0].v;
+}
+
+/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */
+u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas)
+{
+ struct minmax_sample val = { .t = t, .v = meas };
+
+ if (unlikely(val.v >= m->s[0].v) || /* found new max? */
+ unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */
+ return minmax_reset(m, t, meas); /* forget earlier samples */
+
+ if (unlikely(val.v >= m->s[1].v))
+ m->s[2] = m->s[1] = val;
+ else if (unlikely(val.v >= m->s[2].v))
+ m->s[2] = val;
+
+ return minmax_subwin_update(m, win, &val);
+}
+EXPORT_SYMBOL(minmax_running_max);
+
+/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */
+u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas)
+{
+ struct minmax_sample val = { .t = t, .v = meas };
+
+ if (unlikely(val.v <= m->s[0].v) || /* found new min? */
+ unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */
+ return minmax_reset(m, t, meas); /* forget earlier samples */
+
+ if (unlikely(val.v <= m->s[1].v))
+ m->s[2] = m->s[1] = val;
+ else if (unlikely(val.v <= m->s[2].v))
+ m->s[2] = val;
+
+ return minmax_subwin_update(m, win, &val);
+}