51 files changed, 3519 insertions, 811 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d4df5b24d75e..0d9e81779e37 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -17,6 +17,23 @@ config PRINTK_TIME
 	  The behavior is also controlled by the kernel command line
 	  parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst
 
+config PRINTK_CALLER
+	bool "Show caller information on printks"
+	depends on PRINTK
+	help
+	  Selecting this option causes printk() to add a caller "thread id" (if
+	  in task context) or a caller "processor id" (if not in task context)
+	  to every message.
+
+	  This option is intended for environments where multiple threads
+	  concurrently call printk() for many times, for it is difficult to
+	  interpret without knowing where these lines (or sometimes individual
+	  line which was divided into multiple lines due to race) came from.
+
+	  Since toggling after boot makes the code racy, currently there is
+	  no option to enable/disable at the kernel command line parameter or
+	  sysfs interface.
+
 config CONSOLE_LOGLEVEL_DEFAULT
 	int "Default console loglevel (1-15)"
 	range 1 15
@@ -179,6 +196,7 @@ config DEBUG_INFO_REDUCED
 config DEBUG_INFO_SPLIT
 	bool "Produce split debuginfo in .dwo files"
 	depends on DEBUG_INFO
+	depends on $(cc-option,-gsplit-dwarf)
 	help
 	  Generate debug info into separate .dwo files. This significantly
 	  reduces the build directory size for builds with DEBUG_INFO,
@@ -194,6 +212,7 @@ config DEBUG_INFO_SPLIT
 config DEBUG_INFO_DWARF4
 	bool "Generate dwarf4 debuginfo"
 	depends on DEBUG_INFO
+	depends on $(cc-option,-gdwarf-4)
 	help
 	  Generate dwarf4 debug info. This requires recent versions
 	  of gcc and gdb. It makes the debug information larger.
@@ -222,7 +241,6 @@ config ENABLE_MUST_CHECK
 config FRAME_WARN
 	int "Warn for stack frames larger than (needs gcc 4.4)"
 	range 0 8192
-	default 3072 if KASAN_EXTRA
 	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
 	default 1280 if (!64BIT && PARISC)
 	default 1024 if (!64BIT && !PARISC)
@@ -266,23 +284,6 @@ config UNUSED_SYMBOLS
 	  you really need it, and what the merge plan to the mainline kernel for
 	  your module is.
 
-config PAGE_OWNER
-	bool "Track page owner"
-	depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
-	select DEBUG_FS
-	select STACKTRACE
-	select STACKDEPOT
-	select PAGE_EXTENSION
-	help
-	  This keeps track of what call chain is the owner of a page, may
-	  help to find bare alloc_page(s) leaks. Even if you include this
-	  feature on your build, it is disabled in default. You should pass
-	  "page_owner=on" to boot parameter in order to enable it. Eats
-	  a fair amount of memory if enabled. See tools/vm/page_owner_sort.c
-	  for user-space helper.
-
-	  If unsure, say N.
-
 config DEBUG_FS
 	bool "Debug Filesystem"
 	help
@@ -1655,42 +1656,6 @@ config PROVIDE_OHCI1394_DMA_INIT
 
 	  See Documentation/debugging-via-ohci1394.txt for more information.
 
-config DMA_API_DEBUG
-	bool "Enable debugging of DMA-API usage"
-	select NEED_DMA_MAP_STATE
-	help
-	  Enable this option to debug the use of the DMA API by device drivers.
-	  With this option you will be able to detect common bugs in device
-	  drivers like double-freeing of DMA mappings or freeing mappings that
-	  were never allocated.
-
-	  This also attempts to catch cases where a page owned by DMA is
-	  accessed by the cpu in a way that could cause data corruption.  For
-	  example, this enables cow_user_page() to check that the source page is
-	  not undergoing DMA.
-
-	  This option causes a performance degradation.  Use only if you want to
-	  debug device drivers and dma interactions.
-
-	  If unsure, say N.
-
-config DMA_API_DEBUG_SG
-	bool "Debug DMA scatter-gather usage"
-	default y
-	depends on DMA_API_DEBUG
-	help
-	  Perform extra checking that callers of dma_map_sg() have respected the
-	  appropriate segment length/boundary limits for the given device when
-	  preparing DMA scatterlists.
-
-	  This is particularly likely to have been overlooked in cases where the
-	  dma_map_sg() API is used for general bulk mapping of pages rather than
-	  preparing literal scatter-gather descriptors, where there is a risk of
-	  unexpected behaviour from DMA API implementations if the scatterlist
-	  is technically out-of-spec.
-
-	  If unsure, say N.
-
 menuconfig RUNTIME_TESTING_MENU
 	bool "Runtime Testing"
 	def_bool y
@@ -1700,7 +1665,6 @@ if RUNTIME_TESTING_MENU
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_FS
-	depends on BLOCK
 	help
 	This module enables testing of the different dumping mechanisms by
 	inducing system failures at predefined crash points.
@@ -1876,6 +1840,19 @@ config TEST_LKM
 
 	  If unsure, say N.
 
+config TEST_VMALLOC
+	tristate "Test module for stress/performance analysis of vmalloc allocator"
+	default n
+       depends on MMU
+	depends on m
+	help
+	  This builds the "test_vmalloc" module that should be used for
+	  stress and performance analysis. So, any new change for vmalloc
+	  subsystem can be evaluated from performance and stability point
+	  of view.
+
+	  If unsure, say N.
+
 config TEST_USER_COPY
 	tristate "Test user/kernel boundary protections"
 	depends on m
@@ -1991,6 +1968,28 @@ config TEST_MEMCAT_P
 
 	  If unsure, say N.
 
+config TEST_LIVEPATCH
+	tristate "Test livepatching"
+	default n
+	depends on DYNAMIC_DEBUG
+	depends on LIVEPATCH
+	depends on m
+	help
+	  Test kernel livepatching features for correctness.  The tests will
+	  load test modules that will be livepatched in various scenarios.
+
+	  To run all the livepatching tests:
+
+	  make -C tools/testing/selftests TARGETS=livepatch run_tests
+
+	  Alternatively, individual tests may be invoked:
+
+	  tools/testing/selftests/livepatch/test-callbacks.sh
+	  tools/testing/selftests/livepatch/test-livepatch.sh
+	  tools/testing/selftests/livepatch/test-shadow-vars.sh
+
+	  If unsure, say N.
+
 config TEST_OBJAGG
 	tristate "Perform selftest on object aggreration manager"
 	default n
@@ -1999,6 +1998,15 @@ config TEST_OBJAGG
 	  Enable this option to test object aggregation manager on boot
 	  (or module load).
 
+
+config TEST_STACKINIT
+	tristate "Test level of stack variable initialization"
+	help
+	  Test if the kernel is zero-initializing stack variables and
+	  padding. Coverage is controlled by compiler flags,
+	  CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF,
+	  or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL.
+
 	  If unsure, say N.
 
 endif # RUNTIME_TESTING_MENU
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index d8c474b6691e..9950b660e62d 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -78,16 +78,6 @@ config KASAN_SW_TAGS
 
 endchoice
 
-config KASAN_EXTRA
-	bool "KASAN: extra checks"
-	depends on KASAN_GENERIC && DEBUG_KERNEL && !COMPILE_TEST
-	help
-	  This enables further checks in generic KASAN, for now it only
-	  includes the address-use-after-scope check that can lead to
-	  excessive kernel stack usage, frame size warnings and longer
-	  compile time.
-	  See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715
-
 choice
 	prompt "Instrumentation type"
 	depends on KASAN
@@ -113,6 +103,28 @@ config KASAN_INLINE
 
 endchoice
 
+config KASAN_STACK_ENABLE
+	bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST
+	default !(CLANG_VERSION < 90000)
+	depends on KASAN
+	help
+	  The LLVM stack address sanitizer has a know problem that
+	  causes excessive stack usage in a lot of functions, see
+	  https://bugs.llvm.org/show_bug.cgi?id=38809
+	  Disabling asan-stack makes it safe to run kernels build
+	  with clang-8 with KASAN enabled, though it loses some of
+	  the functionality.
+	  This feature is always disabled when compile-testing with clang-8
+	  or earlier to avoid cluttering the output in stack overflow
+	  warnings, but clang-8 users can still enable it for builds without
+	  CONFIG_COMPILE_TEST.  On gcc and later clang versions it is
+	  assumed to always be safe to use and enabled by default.
+
+config KASAN_STACK
+	int
+	default 1 if KASAN_STACK_ENABLE || CC_IS_GCC
+	default 0
+
 config KASAN_S390_4_LEVEL_PAGING
 	bool "KASan: use 4-level paging"
 	depends on KASAN && S390
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 98fa559ebd80..a2ae4a8e4fa6 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -27,15 +27,19 @@ config UBSAN_SANITIZE_ALL
 	  Enabling this option will get kernel image size increased
 	  significantly.
 
-config UBSAN_ALIGNMENT
-	bool "Enable checking of pointers alignment"
+config UBSAN_NO_ALIGNMENT
+	bool "Disable checking of pointers alignment"
 	depends on UBSAN
-	default y if !HAVE_EFFICIENT_UNALIGNED_ACCESS
+	default y if HAVE_EFFICIENT_UNALIGNED_ACCESS
 	help
-	  This option enables detection of unaligned memory accesses.
-	  Enabling this option on architectures that support unaligned
+	  This option disables the check of unaligned memory accesses.
+	  This option should be used when building allmodconfig.
+	  Disabling this option on architectures that support unaligned
 	  accesses may produce a lot of false positives.
 
+config UBSAN_ALIGNMENT
+	def_bool !UBSAN_NO_ALIGNMENT
+
 config TEST_UBSAN
 	tristate "Module for testing for undefined behavior detection"
 	depends on m && UBSAN
diff --git a/lib/Makefile b/lib/Makefile
index e1b59da71418..4e066120a0d6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -35,10 +35,11 @@ obj-y	+= lockref.o
 
 obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
-	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
+	 gcd.o lcm.o list_sort.o uuid.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o rhashtable.o reciprocal_div.o \
-	 once.o refcount.o usercopy.o errseq.o bucket_locks.o
+	 once.o refcount.o usercopy.o errseq.o bucket_locks.o \
+	 generic-radix-tree.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
@@ -60,6 +61,7 @@ UBSAN_SANITIZE_test_ubsan.o := y
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
+obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o
 obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_SORT) += test_sort.o
@@ -76,6 +78,9 @@ obj-$(CONFIG_TEST_KMOD) += test_kmod.o
 obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
 obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
 obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o
+obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o
+
+obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index c6659cb37033..edc3c14af41d 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -768,9 +768,11 @@ all_leaves_cluster_together:
 		new_s0->index_key[i] =
 			ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
 
-	blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
-	pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
-	new_s0->index_key[keylen - 1] &= ~blank;
+	if (level & ASSOC_ARRAY_KEY_CHUNK_MASK) {
+		blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
+		pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
+		new_s0->index_key[keylen - 1] &= ~blank;
+	}
 
 	/* This now reduces to a node splitting exercise for which we'll need
 	 * to regenerate the disparity table.
@@ -1115,6 +1117,7 @@ struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
 						index_key))
 				goto found_leaf;
 		}
+		/* fall through */
 	case assoc_array_walk_tree_empty:
 	case assoc_array_walk_found_wrong_shortcut:
 	default:
diff --git a/lib/bsearch.c b/lib/bsearch.c
index 18b445b010c3..82512fe7b33c 100644
--- a/lib/bsearch.c
+++ b/lib/bsearch.c
@@ -11,6 +11,7 @@
 
 #include <linux/export.h>
 #include <linux/bsearch.h>
+#include <linux/kprobes.h>
 
 /*
  * bsearch - binary search an array of elements
@@ -53,3 +54,4 @@ void *bsearch(const void *key, const void *base, size_t num, size_t size,
 	return NULL;
 }
 EXPORT_SYMBOL(bsearch);
+NOKPROBE_SYMBOL(bsearch);
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 8d666ab84b5c..0cb672eb107c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -5,6 +5,7 @@
 #include <linux/cpumask.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
+#include <linux/numa.h>
 
 /**
  * cpumask_next - get the next cpu in a cpumask
@@ -164,6 +165,9 @@ EXPORT_SYMBOL(zalloc_cpumask_var);
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
 	*mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES);
+	if (!*mask)
+		panic("%s: Failed to allocate %u bytes\n", __func__,
+		      cpumask_size());
 }
 
 /**
@@ -206,7 +210,7 @@ unsigned int cpumask_local_spread(unsigned int i, int node)
 	/* Wrap: we always want a cpu. */
 	i %= num_online_cpus();
 
-	if (node == -1) {
+	if (node == NUMA_NO_NODE) {
 		for_each_cpu(cpu, cpu_online_mask)
 			if (i-- == 0)
 				return cpu;
diff --git a/lib/crc32.c b/lib/crc32.c
index 45b1d67a1767..4a20455d1f61 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -206,8 +206,8 @@ u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 EXPORT_SYMBOL(crc32_le);
 EXPORT_SYMBOL(__crc32c_le);
 
-u32 crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
-u32 __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
+u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
 
 /*
  * This multiplies the polynomials x and y modulo the given modulus.
diff --git a/lib/devres.c b/lib/devres.c
index faccf1a037d0..69bed2f38306 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -134,7 +134,6 @@ EXPORT_SYMBOL(devm_iounmap);
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 {
 	resource_size_t size;
-	const char *name;
 	void __iomem *dest_ptr;
 
 	BUG_ON(!dev);
@@ -145,9 +144,8 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 	}
 
 	size = resource_size(res);
-	name = res->name ?: dev_name(dev);
 
-	if (!devm_request_mem_region(dev, res->start, size, name)) {
+	if (!devm_request_mem_region(dev, res->start, size, dev_name(dev))) {
 		dev_err(dev, "can't request region for resource %pR\n", res);
 		return IOMEM_ERR_PTR(-EBUSY);
 	}
diff --git a/lib/div64.c b/lib/div64.c
index 01c8602bb6ff..ee146bb4c558 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -109,7 +109,7 @@ u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
 		quot = div_u64_rem(dividend, divisor, &rem32);
 		*remainder = rem32;
 	} else {
-		int n = 1 + fls(high);
+		int n = fls(high);
 		quot = div_u64(dividend >> n, divisor >> n);
 
 		if (quot != 0)
@@ -147,7 +147,7 @@ u64 div64_u64(u64 dividend, u64 divisor)
 	if (high == 0) {
 		quot = div_u64(dividend, divisor);
 	} else {
-		int n = 1 + fls(high);
+		int n = fls(high);
 		quot = div_u64(dividend >> n, divisor >> n);
 
 		if (quot != 0)
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index dbf2b457e47e..7bdf98c37e91 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -847,17 +847,19 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 			     const char *name)
 {
 	struct ddebug_table *dt;
-	const char *new_name;
 
 	dt = kzalloc(sizeof(*dt), GFP_KERNEL);
-	if (dt == NULL)
-		return -ENOMEM;
-	new_name = kstrdup_const(name, GFP_KERNEL);
-	if (new_name == NULL) {
-		kfree(dt);
+	if (dt == NULL) {
+		pr_err("error adding module: %s\n", name);
 		return -ENOMEM;
 	}
-	dt->mod_name = new_name;
+	/*
+	 * For built-in modules, name lives in .rodata and is
+	 * immortal. For loaded modules, name points at the name[]
+	 * member of struct module, which lives at least as long as
+	 * this struct ddebug_table.
+	 */
+	dt->mod_name = name;
 	dt->num_ddebugs = n;
 	dt->ddebugs = tab;
 
@@ -868,7 +870,6 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 	vpr_info("%u debug prints in module %s\n", n, dt->mod_name);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ddebug_add_module);
 
 /* helper for ddebug_dyndbg_(boot|module)_param_cb */
 static int ddebug_dyndbg_param_cb(char *param, char *val,
@@ -913,7 +914,6 @@ int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *module)
 static void ddebug_table_free(struct ddebug_table *dt)
 {
 	list_del_init(&dt->link);
-	kfree_const(dt->mod_name);
 	kfree(dt);
 }
 
@@ -930,15 +930,15 @@ int ddebug_remove_module(const char *mod_name)
 
 	mutex_lock(&ddebug_lock);
 	list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) {
-		if (!strcmp(dt->mod_name, mod_name)) {
+		if (dt->mod_name == mod_name) {
 			ddebug_table_free(dt);
 			ret = 0;
+			break;
 		}
 	}
 	mutex_unlock(&ddebug_lock);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(ddebug_remove_module);
 
 static void ddebug_remove_all_tables(void)
 {
diff --git a/lib/flex_array.c b/lib/flex_array.c
deleted file mode 100644
index 2eed22fa507c..000000000000
--- a/lib/flex_array.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Flexible array managed in PAGE_SIZE parts
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2009
- *
- * Author: Dave Hansen <dave@linux.vnet.ibm.com>
- */
-
-#include <linux/flex_array.h>
-#include <linux/slab.h>
-#include <linux/stddef.h>
-#include <linux/export.h>
-#include <linux/reciprocal_div.h>
-
-struct flex_array_part {
-	char elements[FLEX_ARRAY_PART_SIZE];
-};
-
-/*
- * If a user requests an allocation which is small
- * enough, we may simply use the space in the
- * flex_array->parts[] array to store the user
- * data.
- */
-static inline int elements_fit_in_base(struct flex_array *fa)
-{
-	int data_size = fa->element_size * fa->total_nr_elements;
-	if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
-		return 1;
-	return 0;
-}
-
-/**
- * flex_array_alloc - allocate a new flexible array
- * @element_size:	the size of individual elements in the array
- * @total:		total number of elements that this should hold
- * @flags:		page allocation flags to use for base array
- *
- * Note: all locking must be provided by the caller.
- *
- * @total is used to size internal structures.  If the user ever
- * accesses any array indexes >=@total, it will produce errors.
- *
- * The maximum number of elements is defined as: the number of
- * elements that can be stored in a page times the number of
- * page pointers that we can fit in the base structure or (using
- * integer math):
- *
- * 	(PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
- *
- * Here's a table showing example capacities.  Note that the maximum
- * index that the get/put() functions is just nr_objects-1.   This
- * basically means that you get 4MB of storage on 32-bit and 2MB on
- * 64-bit.
- *
- *
- * Element size | Objects | Objects |
- * PAGE_SIZE=4k |  32-bit |  64-bit |
- * ---------------------------------|
- *      1 bytes | 4177920 | 2088960 |
- *      2 bytes | 2088960 | 1044480 |
- *      3 bytes | 1392300 |  696150 |
- *      4 bytes | 1044480 |  522240 |
- *     32 bytes |  130560 |   65408 |
- *     33 bytes |  126480 |   63240 |
- *   2048 bytes |    2040 |    1020 |
- *   2049 bytes |    1020 |     510 |
- *       void * | 1044480 |  261120 |
- *
- * Since 64-bit pointers are twice the size, we lose half the
- * capacity in the base structure.  Also note that no effort is made
- * to efficiently pack objects across page boundaries.
- */
-struct flex_array *flex_array_alloc(int element_size, unsigned int total,
-					gfp_t flags)
-{
-	struct flex_array *ret;
-	int elems_per_part = 0;
-	int max_size = 0;
-	struct reciprocal_value reciprocal_elems = { 0 };
-
-	if (element_size) {
-		elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
-		reciprocal_elems = reciprocal_value(elems_per_part);
-		max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part;
-	}
-
-	/* max_size will end up 0 if element_size > PAGE_SIZE */
-	if (total > max_size)
-		return NULL;
-	ret = kzalloc(sizeof(struct flex_array), flags);
-	if (!ret)
-		return NULL;
-	ret->element_size = element_size;
-	ret->total_nr_elements = total;
-	ret->elems_per_part = elems_per_part;
-	ret->reciprocal_elems = reciprocal_elems;
-	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
-		memset(&ret->parts[0], FLEX_ARRAY_FREE,
-						FLEX_ARRAY_BASE_BYTES_LEFT);
-	return ret;
-}
-EXPORT_SYMBOL(flex_array_alloc);
-
-static int fa_element_to_part_nr(struct flex_array *fa,
-					unsigned int element_nr)
-{
-	/*
-	 * if element_size == 0 we don't get here, so we never touch
-	 * the zeroed fa->reciprocal_elems, which would yield invalid
-	 * results
-	 */
-	return reciprocal_divide(element_nr, fa->reciprocal_elems);
-}
-
-/**
- * flex_array_free_parts - just free the second-level pages
- * @fa:		the flex array from which to free parts
- *
- * This is to be used in cases where the base 'struct flex_array'
- * has been statically allocated and should not be free.
- */
-void flex_array_free_parts(struct flex_array *fa)
-{
-	int part_nr;
-
-	if (elements_fit_in_base(fa))
-		return;
-	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
-		kfree(fa->parts[part_nr]);
-}
-EXPORT_SYMBOL(flex_array_free_parts);
-
-void flex_array_free(struct flex_array *fa)
-{
-	flex_array_free_parts(fa);
-	kfree(fa);
-}
-EXPORT_SYMBOL(flex_array_free);
-
-static unsigned int index_inside_part(struct flex_array *fa,
-					unsigned int element_nr,
-					unsigned int part_nr)
-{
-	unsigned int part_offset;
-
-	part_offset = element_nr - part_nr * fa->elems_per_part;
-	return part_offset * fa->element_size;
-}
-
-static struct flex_array_part *
-__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
-{
-	struct flex_array_part *part = fa->parts[part_nr];
-	if (!part) {
-		part = kmalloc(sizeof(struct flex_array_part), flags);
-		if (!part)
-			return NULL;
-		if (!(flags & __GFP_ZERO))
-			memset(part, FLEX_ARRAY_FREE,
-				sizeof(struct flex_array_part));
-		fa->parts[part_nr] = part;
-	}
-	return part;
-}
-
-/**
- * flex_array_put - copy data into the array at @element_nr
- * @fa:		the flex array to copy data into
- * @element_nr:	index of the position in which to insert
- * 		the new element.
- * @src:	address of data to copy into the array
- * @flags:	page allocation flags to use for array expansion
- *
- *
- * Note that this *copies* the contents of @src into
- * the array.  If you are trying to store an array of
- * pointers, make sure to pass in &ptr instead of ptr.
- * You may instead wish to use the flex_array_put_ptr()
- * helper function.
- *
- * Locking must be provided by the caller.
- */
-int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
-			gfp_t flags)
-{
-	int part_nr = 0;
-	struct flex_array_part *part;
-	void *dst;
-
-	if (element_nr >= fa->total_nr_elements)
-		return -ENOSPC;
-	if (!fa->element_size)
-		return 0;
-	if (elements_fit_in_base(fa))
-		part = (struct flex_array_part *)&fa->parts[0];
-	else {
-		part_nr = fa_element_to_part_nr(fa, element_nr);
-		part = __fa_get_part(fa, part_nr, flags);
-		if (!part)
-			return -ENOMEM;
-	}
-	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
-	memcpy(dst, src, fa->element_size);
-	return 0;
-}
-EXPORT_SYMBOL(flex_array_put);
-
-/**
- * flex_array_clear - clear element in array at @element_nr
- * @fa:		the flex array of the element.
- * @element_nr:	index of the position to clear.
- *
- * Locking must be provided by the caller.
- */
-int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
-{
-	int part_nr = 0;
-	struct flex_array_part *part;
-	void *dst;
-
-	if (element_nr >= fa->total_nr_elements)
-		return -ENOSPC;
-	if (!fa->element_size)
-		return 0;
-	if (elements_fit_in_base(fa))
-		part = (struct flex_array_part *)&fa->parts[0];
-	else {
-		part_nr = fa_element_to_part_nr(fa, element_nr);
-		part = fa->parts[part_nr];
-		if (!part)
-			return -EINVAL;
-	}
-	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
-	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
-	return 0;
-}
-EXPORT_SYMBOL(flex_array_clear);
-
-/**
- * flex_array_prealloc - guarantee that array space exists
- * @fa:			the flex array for which to preallocate parts
- * @start:		index of first array element for which space is allocated
- * @nr_elements:	number of elements for which space is allocated
- * @flags:		page allocation flags
- *
- * This will guarantee that no future calls to flex_array_put()
- * will allocate memory.  It can be used if you are expecting to
- * be holding a lock or in some atomic context while writing
- * data into the array.
- *
- * Locking must be provided by the caller.
- */
-int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-			unsigned int nr_elements, gfp_t flags)
-{
-	int start_part;
-	int end_part;
-	int part_nr;
-	unsigned int end;
-	struct flex_array_part *part;
-
-	if (!start && !nr_elements)
-		return 0;
-	if (start >= fa->total_nr_elements)
-		return -ENOSPC;
-	if (!nr_elements)
-		return 0;
-
-	end = start + nr_elements - 1;
-
-	if (end >= fa->total_nr_elements)
-		return -ENOSPC;
-	if (!fa->element_size)
-		return 0;
-	if (elements_fit_in_base(fa))
-		return 0;
-	start_part = fa_element_to_part_nr(fa, start);
-	end_part = fa_element_to_part_nr(fa, end);
-	for (part_nr = start_part; part_nr <= end_part; part_nr++) {
-		part = __fa_get_part(fa, part_nr, flags);
-		if (!part)
-			return -ENOMEM;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(flex_array_prealloc);
-
-/**
- * flex_array_get - pull data back out of the array
- * @fa:		the flex array from which to extract data
- * @element_nr:	index of the element to fetch from the array
- *
- * Returns a pointer to the data at index @element_nr.  Note
- * that this is a copy of the data that was passed in.  If you
- * are using this to store pointers, you'll get back &ptr.  You
- * may instead wish to use the flex_array_get_ptr helper.
- *
- * Locking must be provided by the caller.
- */
-void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
-{
-	int part_nr = 0;
-	struct flex_array_part *part;
-
-	if (!fa->element_size)
-		return NULL;
-	if (element_nr >= fa->total_nr_elements)
-		return NULL;
-	if (elements_fit_in_base(fa))
-		part = (struct flex_array_part *)&fa->parts[0];
-	else {
-		part_nr = fa_element_to_part_nr(fa, element_nr);
-		part = fa->parts[part_nr];
-		if (!part)
-			return NULL;
-	}
-	return &part->elements[index_inside_part(fa, element_nr, part_nr)];
-}
-EXPORT_SYMBOL(flex_array_get);
-
-/**
- * flex_array_get_ptr - pull a ptr back out of the array
- * @fa:		the flex array from which to extract data
- * @element_nr:	index of the element to fetch from the array
- *
- * Returns the pointer placed in the flex array at element_nr using
- * flex_array_put_ptr().  This function should not be called if the
- * element in question was not set using the _put_ptr() helper.
- */
-void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
-{
-	void **tmp;
-
-	tmp = flex_array_get(fa, element_nr);
-	if (!tmp)
-		return NULL;
-
-	return *tmp;
-}
-EXPORT_SYMBOL(flex_array_get_ptr);
-
-static int part_is_free(struct flex_array_part *part)
-{
-	int i;
-
-	for (i = 0; i < sizeof(struct flex_array_part); i++)
-		if (part->elements[i] != FLEX_ARRAY_FREE)
-			return 0;
-	return 1;
-}
-
-/**
- * flex_array_shrink - free unused second-level pages
- * @fa:		the flex array to shrink
- *
- * Frees all second-level pages that consist solely of unused
- * elements.  Returns the number of pages freed.
- *
- * Locking must be provided by the caller.
- */
-int flex_array_shrink(struct flex_array *fa)
-{
-	struct flex_array_part *part;
-	int part_nr;
-	int ret = 0;
-
-	if (!fa->total_nr_elements || !fa->element_size)
-		return 0;
-	if (elements_fit_in_base(fa))
-		return ret;
-	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
-		part = fa->parts[part_nr];
-		if (!part)
-			continue;
-		if (part_is_free(part)) {
-			fa->parts[part_nr] = NULL;
-			kfree(part);
-			ret++;
-		}
-	}
-	return ret;
-}
-EXPORT_SYMBOL(flex_array_shrink);
diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
new file mode 100644
index 000000000000..a7bafc413730
--- /dev/null
+++ b/lib/generic-radix-tree.c
@@ -0,0 +1,217 @@
+
+#include <linux/export.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/gfp.h>
+
+#define GENRADIX_ARY		(PAGE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT	ilog2(GENRADIX_ARY)
+
+struct genradix_node {
+	union {
+		/* Interior node: */
+		struct genradix_node	*children[GENRADIX_ARY];
+
+		/* Leaf: */
+		u8			data[PAGE_SIZE];
+	};
+};
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+	return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+	return 1UL << genradix_depth_shift(depth);
+}
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH	\
+	DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK				\
+	((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+	return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+	return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, or NULL if not
+ * allocated
+ */
+void *__genradix_ptr(struct __genradix *radix, size_t offset)
+{
+	struct genradix_root *r = READ_ONCE(radix->root);
+	struct genradix_node *n = genradix_root_to_node(r);
+	unsigned level		= genradix_root_to_depth(r);
+
+	if (ilog2(offset) >= genradix_depth_shift(level))
+		return NULL;
+
+	while (1) {
+		if (!n)
+			return NULL;
+		if (!level)
+			break;
+
+		level--;
+
+		n = n->children[offset >> genradix_depth_shift(level)];
+		offset &= genradix_depth_size(level) - 1;
+	}
+
+	return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr);
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, allocating it if
+ * necessary - newly allocated slots are always zeroed out:
+ */
+void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+			   gfp_t gfp_mask)
+{
+	struct genradix_root *v = READ_ONCE(radix->root);
+	struct genradix_node *n, *new_node = NULL;
+	unsigned level;
+
+	/* Increase tree depth if necessary: */
+	while (1) {
+		struct genradix_root *r = v, *new_root;
+
+		n	= genradix_root_to_node(r);
+		level	= genradix_root_to_depth(r);
+
+		if (n && ilog2(offset) < genradix_depth_shift(level))
+			break;
+
+		if (!new_node) {
+			new_node = (void *)
+				__get_free_page(gfp_mask|__GFP_ZERO);
+			if (!new_node)
+				return NULL;
+		}
+
+		new_node->children[0] = n;
+		new_root = ((struct genradix_root *)
+			    ((unsigned long) new_node | (n ? level + 1 : 0)));
+
+		if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
+			v = new_root;
+			new_node = NULL;
+		}
+	}
+
+	while (level--) {
+		struct genradix_node **p =
+			&n->children[offset >> genradix_depth_shift(level)];
+		offset &= genradix_depth_size(level) - 1;
+
+		n = READ_ONCE(*p);
+		if (!n) {
+			if (!new_node) {
+				new_node = (void *)
+					__get_free_page(gfp_mask|__GFP_ZERO);
+				if (!new_node)
+					return NULL;
+			}
+
+			if (!(n = cmpxchg_release(p, NULL, new_node)))
+				swap(n, new_node);
+		}
+	}
+
+	if (new_node)
+		free_page((unsigned long) new_node);
+
+	return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr_alloc);
+
+void *__genradix_iter_peek(struct genradix_iter *iter,
+			   struct __genradix *radix,
+			   size_t objs_per_page)
+{
+	struct genradix_root *r;
+	struct genradix_node *n;
+	unsigned level, i;
+restart:
+	r = READ_ONCE(radix->root);
+	if (!r)
+		return NULL;
+
+	n	= genradix_root_to_node(r);
+	level	= genradix_root_to_depth(r);
+
+	if (ilog2(iter->offset) >= genradix_depth_shift(level))
+		return NULL;
+
+	while (level) {
+		level--;
+
+		i = (iter->offset >> genradix_depth_shift(level)) &
+			(GENRADIX_ARY - 1);
+
+		while (!n->children[i]) {
+			i++;
+			iter->offset = round_down(iter->offset +
+					   genradix_depth_size(level),
+					   genradix_depth_size(level));
+			iter->pos = (iter->offset >> PAGE_SHIFT) *
+				objs_per_page;
+			if (i == GENRADIX_ARY)
+				goto restart;
+		}
+
+		n = n->children[i];
+	}
+
+	return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek);
+
+static void genradix_free_recurse(struct genradix_node *n, unsigned level)
+{
+	if (level) {
+		unsigned i;
+
+		for (i = 0; i < GENRADIX_ARY; i++)
+			if (n->children[i])
+				genradix_free_recurse(n->children[i], level - 1);
+	}
+
+	free_page((unsigned long) n);
+}
+
+int __genradix_prealloc(struct __genradix *radix, size_t size,
+			gfp_t gfp_mask)
+{
+	size_t offset;
+
+	for (offset = 0; offset < size; offset += PAGE_SIZE)
+		if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+			return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL(__genradix_prealloc);
+
+void __genradix_free(struct __genradix *radix)
+{
+	struct genradix_root *r = xchg(&radix->root, NULL);
+
+	genradix_free_recurse(genradix_root_to_node(r),
+			      genradix_root_to_depth(r));
+}
+EXPORT_SYMBOL(__genradix_free);
diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
index 14436f4ca6bd..30e0f9770f88 100644
--- a/lib/int_sqrt.c
+++ b/lib/int_sqrt.c
@@ -52,7 +52,7 @@ u32 int_sqrt64(u64 x)
 	if (x <= ULONG_MAX)
 		return int_sqrt((unsigned long) x);
 
-	m = 1ULL << (fls64(x) & ~1ULL);
+	m = 1ULL << ((fls64(x) - 1) & ~1ULL);
 	while (m != 0) {
 		b = y + m;
 		y >>= 1;
diff --git a/lib/iomap.c b/lib/iomap.c
index 541d926da95e..e909ab71e995 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -65,8 +65,9 @@ static void bad_io_access(unsigned long port, const char *access)
 #endif
 
 #ifndef mmio_read16be
-#define mmio_read16be(addr) be16_to_cpu(__raw_readw(addr))
-#define mmio_read32be(addr) be32_to_cpu(__raw_readl(addr))
+#define mmio_read16be(addr) swab16(readw(addr))
+#define mmio_read32be(addr) swab32(readl(addr))
+#define mmio_read64be(addr) swab64(readq(addr))
 #endif
 
 unsigned int ioread8(void __iomem *addr)
@@ -100,14 +101,89 @@ EXPORT_SYMBOL(ioread16be);
 EXPORT_SYMBOL(ioread32);
 EXPORT_SYMBOL(ioread32be);
 
+#ifdef readq
+static u64 pio_read64_lo_hi(unsigned long port)
+{
+	u64 lo, hi;
+
+	lo = inl(port);
+	hi = inl(port + sizeof(u32));
+
+	return lo | (hi << 32);
+}
+
+static u64 pio_read64_hi_lo(unsigned long port)
+{
+	u64 lo, hi;
+
+	hi = inl(port + sizeof(u32));
+	lo = inl(port);
+
+	return lo | (hi << 32);
+}
+
+static u64 pio_read64be_lo_hi(unsigned long port)
+{
+	u64 lo, hi;
+
+	lo = pio_read32be(port + sizeof(u32));
+	hi = pio_read32be(port);
+
+	return lo | (hi << 32);
+}
+
+static u64 pio_read64be_hi_lo(unsigned long port)
+{
+	u64 lo, hi;
+
+	hi = pio_read32be(port);
+	lo = pio_read32be(port + sizeof(u32));
+
+	return lo | (hi << 32);
+}
+
+u64 ioread64_lo_hi(void __iomem *addr)
+{
+	IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr));
+	return 0xffffffffffffffffULL;
+}
+
+u64 ioread64_hi_lo(void __iomem *addr)
+{
+	IO_COND(addr, return pio_read64_hi_lo(port), return readq(addr));
+	return 0xffffffffffffffffULL;
+}
+
+u64 ioread64be_lo_hi(void __iomem *addr)
+{
+	IO_COND(addr, return pio_read64be_lo_hi(port),
+		return mmio_read64be(addr));
+	return 0xffffffffffffffffULL;
+}
+
+u64 ioread64be_hi_lo(void __iomem *addr)
+{
+	IO_COND(addr, return pio_read64be_hi_lo(port),
+		return mmio_read64be(addr));
+	return 0xffffffffffffffffULL;
+}
+
+EXPORT_SYMBOL(ioread64_lo_hi);
+EXPORT_SYMBOL(ioread64_hi_lo);
+EXPORT_SYMBOL(ioread64be_lo_hi);
+EXPORT_SYMBOL(ioread64be_hi_lo);
+
+#endif /* readq */
+
 #ifndef pio_write16be
 #define pio_write16be(val,port) outw(swab16(val),port)
 #define pio_write32be(val,port) outl(swab32(val),port)
 #endif
 
 #ifndef mmio_write16be
-#define mmio_write16be(val,port) __raw_writew(be16_to_cpu(val),port)
-#define mmio_write32be(val,port) __raw_writel(be32_to_cpu(val),port)
+#define mmio_write16be(val,port) writew(swab16(val),port)
+#define mmio_write32be(val,port) writel(swab32(val),port)
+#define mmio_write64be(val,port) writeq(swab64(val),port)
 #endif
 
 void iowrite8(u8 val, void __iomem *addr)
@@ -136,6 +212,62 @@ EXPORT_SYMBOL(iowrite16be);
 EXPORT_SYMBOL(iowrite32);
 EXPORT_SYMBOL(iowrite32be);
 
+#ifdef writeq
+static void pio_write64_lo_hi(u64 val, unsigned long port)
+{
+	outl(val, port);
+	outl(val >> 32, port + sizeof(u32));
+}
+
+static void pio_write64_hi_lo(u64 val, unsigned long port)
+{
+	outl(val >> 32, port + sizeof(u32));
+	outl(val, port);
+}
+
+static void pio_write64be_lo_hi(u64 val, unsigned long port)
+{
+	pio_write32be(val, port + sizeof(u32));
+	pio_write32be(val >> 32, port);
+}
+
+static void pio_write64be_hi_lo(u64 val, unsigned long port)
+{
+	pio_write32be(val >> 32, port);
+	pio_write32be(val, port + sizeof(u32));
+}
+
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+	IO_COND(addr, pio_write64_lo_hi(val, port),
+		writeq(val, addr));
+}
+
+void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+	IO_COND(addr, pio_write64_hi_lo(val, port),
+		writeq(val, addr));
+}
+
+void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+	IO_COND(addr, pio_write64be_lo_hi(val, port),
+		mmio_write64be(val, addr));
+}
+
+void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+	IO_COND(addr, pio_write64be_hi_lo(val, port),
+		mmio_write64be(val, addr));
+}
+
+EXPORT_SYMBOL(iowrite64_lo_hi);
+EXPORT_SYMBOL(iowrite64_hi_lo);
+EXPORT_SYMBOL(iowrite64be_lo_hi);
+EXPORT_SYMBOL(iowrite64be_hi_lo);
+
+#endif /* readq */
+
 /*
  * These are the "repeat MMIO read/write" functions.
  * Note the "__raw" accesses, since we don't want to
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index be4bd627caf0..ea36dc355da1 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -861,8 +861,21 @@ EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 
 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 {
-	struct page *head = compound_head(page);
-	size_t v = n + offset + page_address(page) - page_address(head);
+	struct page *head;
+	size_t v = n + offset;
+
+	/*
+	 * The general case needs to access the page order in order
+	 * to compute the page size.
+	 * However, we mostly deal with order-0 pages and thus can
+	 * avoid a possible cache line miss for requests that fit all
+	 * page orders.
+	 */
+	if (n <= v && v <= PAGE_SIZE)
+		return true;
+
+	head = compound_head(page);
+	v += (page - head) << PAGE_SHIFT;
 
 	if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
 		return true;
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 86a709954f5a..2f17b488d58e 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -35,7 +35,7 @@ void irq_poll_sched(struct irq_poll *iop)
 
 	local_irq_save(flags);
 	list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
-	__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+	raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(irq_poll_sched);
diff --git a/lib/kobject.c b/lib/kobject.c
index b72e00fd7d09..aa89edcd2b63 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -887,7 +887,7 @@ static void kset_release(struct kobject *kobj)
 	kfree(kset);
 }
 
-void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+static void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
 {
 	if (kobj->parent)
 		kobject_get_ownership(kobj->parent, uid, gid);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 27c6118afd1c..f05802687ba4 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -200,7 +200,7 @@ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count)
 
 	r = kobject_action_type(buf, count, &action, &action_args);
 	if (r) {
-		msg = "unknown uevent action string\n";
+		msg = "unknown uevent action string";
 		goto out;
 	}
 
@@ -212,7 +212,7 @@ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count)
 	r = kobject_action_args(action_args,
 				count - (action_args - buf), &env);
 	if (r == -EINVAL) {
-		msg = "incorrect uevent action arguments\n";
+		msg = "incorrect uevent action arguments";
 		goto out;
 	}
 
@@ -224,7 +224,7 @@ int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count)
 out:
 	if (r) {
 		devpath = kobject_get_path(kobj, GFP_KERNEL);
-		printk(KERN_WARNING "synth uevent: %s: %s",
+		pr_warn("synth uevent: %s: %s\n",
 		       devpath ?: "unknown device",
 		       msg ?: "failed to send uevent");
 		kfree(devpath);
@@ -765,8 +765,7 @@ static int uevent_net_init(struct net *net)
 
 	ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, &cfg);
 	if (!ue_sk->sk) {
-		printk(KERN_ERR
-		       "kobject_uevent: unable to create netlink socket!\n");
+		pr_err("kobject_uevent: unable to create netlink socket!\n");
 		kfree(ue_sk);
 		return -ENODEV;
 	}
diff --git a/lib/livepatch/Makefile b/lib/livepatch/Makefile
new file mode 100644
index 000000000000..26900ddaef82
--- /dev/null
+++ b/lib/livepatch/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for livepatch test code.
+
+obj-$(CONFIG_TEST_LIVEPATCH) += test_klp_atomic_replace.o \
+				test_klp_callbacks_demo.o \
+				test_klp_callbacks_demo2.o \
+				test_klp_callbacks_busy.o \
+				test_klp_callbacks_mod.o \
+				test_klp_livepatch.o \
+				test_klp_shadow_vars.o
+
+# Target modules to be livepatched require CC_FLAGS_FTRACE
+CFLAGS_test_klp_callbacks_busy.o	+= $(CC_FLAGS_FTRACE)
+CFLAGS_test_klp_callbacks_mod.o		+= $(CC_FLAGS_FTRACE)
diff --git a/lib/livepatch/test_klp_atomic_replace.c b/lib/livepatch/test_klp_atomic_replace.c
new file mode 100644
index 000000000000..5af7093ca00c
--- /dev/null
+++ b/lib/livepatch/test_klp_atomic_replace.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int replace;
+module_param(replace, int, 0644);
+MODULE_PARM_DESC(replace, "replace (default=0)");
+
+#include <linux/seq_file.h>
+static int livepatch_meminfo_proc_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "%s: %s\n", THIS_MODULE->name,
+		   "this has been live patched");
+	return 0;
+}
+
+static struct klp_func funcs[] = {
+	{
+		.old_name = "meminfo_proc_show",
+		.new_func = livepatch_meminfo_proc_show,
+	}, {}
+};
+
+static struct klp_object objs[] = {
+	{
+		/* name being NULL means vmlinux */
+		.funcs = funcs,
+	}, {}
+};
+
+static struct klp_patch patch = {
+	.mod = THIS_MODULE,
+	.objs = objs,
+	/* set .replace in the init function below for demo purposes */
+};
+
+static int test_klp_atomic_replace_init(void)
+{
+	patch.replace = replace;
+	return klp_enable_patch(&patch);
+}
+
+static void test_klp_atomic_replace_exit(void)
+{
+}
+
+module_init(test_klp_atomic_replace_init);
+module_exit(test_klp_atomic_replace_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: atomic replace");
diff --git a/lib/livepatch/test_klp_callbacks_busy.c b/lib/livepatch/test_klp_callbacks_busy.c
new file mode 100644
index 000000000000..40beddf8a0e2
--- /dev/null
+++ b/lib/livepatch/test_klp_callbacks_busy.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+
+static int sleep_secs;
+module_param(sleep_secs, int, 0644);
+MODULE_PARM_DESC(sleep_secs, "sleep_secs (default=0)");
+
+static void busymod_work_func(struct work_struct *work);
+static DECLARE_DELAYED_WORK(work, busymod_work_func);
+
+static void busymod_work_func(struct work_struct *work)
+{
+	pr_info("%s, sleeping %d seconds ...\n", __func__, sleep_secs);
+	msleep(sleep_secs * 1000);
+	pr_info("%s exit\n", __func__);
+}
+
+static int test_klp_callbacks_busy_init(void)
+{
+	pr_info("%s\n", __func__);
+	schedule_delayed_work(&work,
+		msecs_to_jiffies(1000 * 0));
+	return 0;
+}
+
+static void test_klp_callbacks_busy_exit(void)
+{
+	cancel_delayed_work_sync(&work);
+	pr_info("%s\n", __func__);
+}
+
+module_init(test_klp_callbacks_busy_init);
+module_exit(test_klp_callbacks_busy_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: busy target module");
diff --git a/lib/livepatch/test_klp_callbacks_demo.c b/lib/livepatch/test_klp_callbacks_demo.c
new file mode 100644
index 000000000000..3fd8fe1cd1cc
--- /dev/null
+++ b/lib/livepatch/test_klp_callbacks_demo.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int pre_patch_ret;
+module_param(pre_patch_ret, int, 0644);
+MODULE_PARM_DESC(pre_patch_ret, "pre_patch_ret (default=0)");
+
+static const char *const module_state[] = {
+	[MODULE_STATE_LIVE]	= "[MODULE_STATE_LIVE] Normal state",
+	[MODULE_STATE_COMING]	= "[MODULE_STATE_COMING] Full formed, running module_init",
+	[MODULE_STATE_GOING]	= "[MODULE_STATE_GOING] Going away",
+	[MODULE_STATE_UNFORMED]	= "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+	if (obj->mod)
+		pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+			module_state[obj->mod->state]);
+	else
+		pr_info("%s: vmlinux\n", callback);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+	callback_info(__func__, obj);
+	return pre_patch_ret;
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+	callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+	callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+	callback_info(__func__, obj);
+}
+
+static void patched_work_func(struct work_struct *work)
+{
+	pr_info("%s\n", __func__);
+}
+
+static struct klp_func no_funcs[] = {
+	{}
+};
+
+static struct klp_func busymod_funcs[] = {
+	{
+		.old_name = "busymod_work_func",
+		.new_func = patched_work_func,
+	}, {}
+};
+
+static struct klp_object objs[] = {
+	{
+		.name = NULL,	/* vmlinux */
+		.funcs = no_funcs,
+		.callbacks = {
+			.pre_patch = pre_patch_callback,
+			.post_patch = post_patch_callback,
+			.pre_unpatch = pre_unpatch_callback,
+			.post_unpatch = post_unpatch_callback,
+		},
+	},	{
+		.name = "test_klp_callbacks_mod",
+		.funcs = no_funcs,
+		.callbacks = {
+			.pre_patch = pre_patch_callback,
+			.post_patch = post_patch_callback,
+			.pre_unpatch = pre_unpatch_callback,
+			.post_unpatch = post_unpatch_callback,
+		},
+	},	{
+		.name = "test_klp_callbacks_busy",
+		.funcs = busymod_funcs,
+		.callbacks = {
+			.pre_patch = pre_patch_callback,
+			.post_patch = post_patch_callback,
+			.pre_unpatch = pre_unpatch_callback,
+			.post_unpatch = post_unpatch_callback,
+		},
+	}, { }
+};
+
+static struct klp_patch patch = {
+	.mod = THIS_MODULE,
+	.objs = objs,
+};
+
+static int test_klp_callbacks_demo_init(void)
+{
+	return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo_init);
+module_exit(test_klp_callbacks_demo_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch demo");
diff --git a/lib/livepatch/test_klp_callbacks_demo2.c b/lib/livepatch/test_klp_callbacks_demo2.c
new file mode 100644
index 000000000000..5417573e80af
--- /dev/null
+++ b/lib/livepatch/test_klp_callbacks_demo2.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int replace;
+module_param(replace, int, 0644);
+MODULE_PARM_DESC(replace, "replace (default=0)");
+
+static const char *const module_state[] = {
+	[MODULE_STATE_LIVE]	= "[MODULE_STATE_LIVE] Normal state",
+	[MODULE_STATE_COMING]	= "[MODULE_STATE_COMING] Full formed, running module_init",
+	[MODULE_STATE_GOING]	= "[MODULE_STATE_GOING] Going away",
+	[MODULE_STATE_UNFORMED]	= "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+	if (obj->mod)
+		pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+			module_state[obj->mod->state]);
+	else
+		pr_info("%s: vmlinux\n", callback);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+	callback_info(__func__, obj);
+	return 0;
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+	callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+	callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+	callback_info(__func__, obj);
+}
+
+static struct klp_func no_funcs[] = {
+	{ }
+};
+
+static struct klp_object objs[] = {
+	{
+		.name = NULL,	/* vmlinux */
+		.funcs = no_funcs,
+		.callbacks = {
+			.pre_patch = pre_patch_callback,
+			.post_patch = post_patch_callback,
+			.pre_unpatch = pre_unpatch_callback,
+			.post_unpatch = post_unpatch_callback,
+		},
+	}, { }
+};
+
+static struct klp_patch patch = {
+	.mod = THIS_MODULE,
+	.objs = objs,
+	/* set .replace in the init function below for demo purposes */
+};
+
+static int test_klp_callbacks_demo2_init(void)
+{
+	patch.replace = replace;
+	return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo2_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo2_init);
+module_exit(test_klp_callbacks_demo2_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch demo2");
diff --git a/lib/livepatch/test_klp_callbacks_mod.c b/lib/livepatch/test_klp_callbacks_mod.c
new file mode 100644
index 000000000000..8fbe645b1c2c
--- /dev/null
+++ b/lib/livepatch/test_klp_callbacks_mod.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+static int test_klp_callbacks_mod_init(void)
+{
+	pr_info("%s\n", __func__);
+	return 0;
+}
+
+static void test_klp_callbacks_mod_exit(void)
+{
+	pr_info("%s\n", __func__);
+}
+
+module_init(test_klp_callbacks_mod_init);
+module_exit(test_klp_callbacks_mod_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: target module");
diff --git a/lib/livepatch/test_klp_livepatch.c b/lib/livepatch/test_klp_livepatch.c
new file mode 100644
index 000000000000..aff08199de71
--- /dev/null
+++ b/lib/livepatch/test_klp_livepatch.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+#include <linux/seq_file.h>
+static int livepatch_cmdline_proc_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "%s: %s\n", THIS_MODULE->name,
+		   "this has been live patched");
+	return 0;
+}
+
+static struct klp_func funcs[] = {
+	{
+		.old_name = "cmdline_proc_show",
+		.new_func = livepatch_cmdline_proc_show,
+	}, { }
+};
+
+static struct klp_object objs[] = {
+	{
+		/* name being NULL means vmlinux */
+		.funcs = funcs,
+	}, { }
+};
+
+static struct klp_patch patch = {
+	.mod = THIS_MODULE,
+	.objs = objs,
+};
+
+static int test_klp_livepatch_init(void)
+{
+	return klp_enable_patch(&patch);
+}
+
+static void test_klp_livepatch_exit(void)
+{
+}
+
+module_init(test_klp_livepatch_init);
+module_exit(test_klp_livepatch_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Seth Jennings <sjenning@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch module");
diff --git a/lib/livepatch/test_klp_shadow_vars.c b/lib/livepatch/test_klp_shadow_vars.c
new file mode 100644
index 000000000000..fe5c413efe96
--- /dev/null
+++ b/lib/livepatch/test_klp_shadow_vars.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/livepatch.h>
+#include <linux/slab.h>
+
+/*
+ * Keep a small list of pointers so that we can print address-agnostic
+ * pointer values.  Use a rolling integer count to differentiate the values.
+ * Ironically we could have used the shadow variable API to do this, but
+ * let's not lean too heavily on the very code we're testing.
+ */
+static LIST_HEAD(ptr_list);
+struct shadow_ptr {
+	void *ptr;
+	int id;
+	struct list_head list;
+};
+
+static void free_ptr_list(void)
+{
+	struct shadow_ptr *sp, *tmp_sp;
+
+	list_for_each_entry_safe(sp, tmp_sp, &ptr_list, list) {
+		list_del(&sp->list);
+		kfree(sp);
+	}
+}
+
+static int ptr_id(void *ptr)
+{
+	struct shadow_ptr *sp;
+	static int count;
+
+	list_for_each_entry(sp, &ptr_list, list) {
+		if (sp->ptr == ptr)
+			return sp->id;
+	}
+
+	sp = kmalloc(sizeof(*sp), GFP_ATOMIC);
+	if (!sp)
+		return -ENOMEM;
+	sp->ptr = ptr;
+	sp->id = count++;
+
+	list_add(&sp->list, &ptr_list);
+
+	return sp->id;
+}
+
+/*
+ * Shadow variable wrapper functions that echo the function and arguments
+ * to the kernel log for testing verification.  Don't display raw pointers,
+ * but use the ptr_id() value instead.
+ */
+static void *shadow_get(void *obj, unsigned long id)
+{
+	void *ret = klp_shadow_get(obj, id);
+
+	pr_info("klp_%s(obj=PTR%d, id=0x%lx) = PTR%d\n",
+		__func__, ptr_id(obj), id, ptr_id(ret));
+
+	return ret;
+}
+
+static void *shadow_alloc(void *obj, unsigned long id, size_t size,
+			  gfp_t gfp_flags, klp_shadow_ctor_t ctor,
+			  void *ctor_data)
+{
+	void *ret = klp_shadow_alloc(obj, id, size, gfp_flags, ctor,
+				     ctor_data);
+	pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n",
+		__func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor),
+		ptr_id(ctor_data), ptr_id(ret));
+	return ret;
+}
+
+static void *shadow_get_or_alloc(void *obj, unsigned long id, size_t size,
+				 gfp_t gfp_flags, klp_shadow_ctor_t ctor,
+				 void *ctor_data)
+{
+	void *ret = klp_shadow_get_or_alloc(obj, id, size, gfp_flags, ctor,
+					    ctor_data);
+	pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n",
+		__func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor),
+		ptr_id(ctor_data), ptr_id(ret));
+	return ret;
+}
+
+static void shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor)
+{
+	klp_shadow_free(obj, id, dtor);
+	pr_info("klp_%s(obj=PTR%d, id=0x%lx, dtor=PTR%d)\n",
+		__func__, ptr_id(obj), id, ptr_id(dtor));
+}
+
+static void shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor)
+{
+	klp_shadow_free_all(id, dtor);
+	pr_info("klp_%s(id=0x%lx, dtor=PTR%d)\n",
+		__func__, id, ptr_id(dtor));
+}
+
+
+/* Shadow variable constructor - remember simple pointer data */
+static int shadow_ctor(void *obj, void *shadow_data, void *ctor_data)
+{
+	int **shadow_int = shadow_data;
+	*shadow_int = ctor_data;
+	pr_info("%s: PTR%d -> PTR%d\n",
+		__func__, ptr_id(shadow_int), ptr_id(ctor_data));
+
+	return 0;
+}
+
+static void shadow_dtor(void *obj, void *shadow_data)
+{
+	pr_info("%s(obj=PTR%d, shadow_data=PTR%d)\n",
+		__func__, ptr_id(obj), ptr_id(shadow_data));
+}
+
+static int test_klp_shadow_vars_init(void)
+{
+	void *obj			= THIS_MODULE;
+	int id			= 0x1234;
+	size_t size		= sizeof(int *);
+	gfp_t gfp_flags		= GFP_KERNEL;
+
+	int var1, var2, var3, var4;
+	int **sv1, **sv2, **sv3, **sv4;
+
+	void *ret;
+
+	ptr_id(NULL);
+	ptr_id(&var1);
+	ptr_id(&var2);
+	ptr_id(&var3);
+	ptr_id(&var4);
+
+	/*
+	 * With an empty shadow variable hash table, expect not to find
+	 * any matches.
+	 */
+	ret = shadow_get(obj, id);
+	if (!ret)
+		pr_info("  got expected NULL result\n");
+
+	/*
+	 * Allocate a few shadow variables with different <obj> and <id>.
+	 */
+	sv1 = shadow_alloc(obj, id, size, gfp_flags, shadow_ctor, &var1);
+	if (!sv1)
+		return -ENOMEM;
+
+	sv2 = shadow_alloc(obj + 1, id, size, gfp_flags, shadow_ctor, &var2);
+	if (!sv2)
+		return -ENOMEM;
+
+	sv3 = shadow_alloc(obj, id + 1, size, gfp_flags, shadow_ctor, &var3);
+	if (!sv3)
+		return -ENOMEM;
+
+	/*
+	 * Verify we can find our new shadow variables and that they point
+	 * to expected data.
+	 */
+	ret = shadow_get(obj, id);
+	if (!ret)
+		return -EINVAL;
+	if (ret == sv1 && *sv1 == &var1)
+		pr_info("  got expected PTR%d -> PTR%d result\n",
+			ptr_id(sv1), ptr_id(*sv1));
+
+	ret = shadow_get(obj + 1, id);
+	if (!ret)
+		return -EINVAL;
+	if (ret == sv2 && *sv2 == &var2)
+		pr_info("  got expected PTR%d -> PTR%d result\n",
+			ptr_id(sv2), ptr_id(*sv2));
+	ret = shadow_get(obj, id + 1);
+	if (!ret)
+		return -EINVAL;
+	if (ret == sv3 && *sv3 == &var3)
+		pr_info("  got expected PTR%d -> PTR%d result\n",
+			ptr_id(sv3), ptr_id(*sv3));
+
+	/*
+	 * Allocate or get a few more, this time with the same <obj>, <id>.
+	 * The second invocation should return the same shadow var.
+	 */
+	sv4 = shadow_get_or_alloc(obj + 2, id, size, gfp_flags, shadow_ctor, &var4);
+	if (!sv4)
+		return -ENOMEM;
+
+	ret = shadow_get_or_alloc(obj + 2, id, size, gfp_flags, shadow_ctor, &var4);
+	if (!ret)
+		return -EINVAL;
+	if (ret == sv4 && *sv4 == &var4)
+		pr_info("  got expected PTR%d -> PTR%d result\n",
+			ptr_id(sv4), ptr_id(*sv4));
+
+	/*
+	 * Free the <obj=*, id> shadow variables and check that we can no
+	 * longer find them.
+	 */
+	shadow_free(obj, id, shadow_dtor);			/* sv1 */
+	ret = shadow_get(obj, id);
+	if (!ret)
+		pr_info("  got expected NULL result\n");
+
+	shadow_free(obj + 1, id, shadow_dtor);			/* sv2 */
+	ret = shadow_get(obj + 1, id);
+	if (!ret)
+		pr_info("  got expected NULL result\n");
+
+	shadow_free(obj + 2, id, shadow_dtor);			/* sv4 */
+	ret = shadow_get(obj + 2, id);
+	if (!ret)
+		pr_info("  got expected NULL result\n");
+
+	/*
+	 * We should still find an <id+1> variable.
+	 */
+	ret = shadow_get(obj, id + 1);
+	if (!ret)
+		return -EINVAL;
+	if (ret == sv3 && *sv3 == &var3)
+		pr_info("  got expected PTR%d -> PTR%d result\n",
+			ptr_id(sv3), ptr_id(*sv3));
+
+	/*
+	 * Free all the <id+1> variables, too.
+	 */
+	shadow_free_all(id + 1, shadow_dtor);			/* sv3 */
+	ret = shadow_get(obj, id);
+	if (!ret)
+		pr_info("  shadow_get() got expected NULL result\n");
+
+
+	free_ptr_list();
+
+	return 0;
+}
+
+static void test_klp_shadow_vars_exit(void)
+{
+}
+
+module_init(test_klp_shadow_vars_init);
+module_exit(test_klp_shadow_vars_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: shadow variables");
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 1e1bbf171eca..a1705545e6ac 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1989,6 +1989,7 @@ void locking_selftest(void)
 
 	init_shared_classes();
 	debug_locks_silent = !debug_locks_verbose;
+	lockdep_set_selftest_task(current);
 
 	DO_TESTCASE_6R("A-A deadlock", AA);
 	DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
@@ -2097,5 +2098,6 @@ void locking_selftest(void)
 		printk("---------------------------------\n");
 		debug_locks = 1;
 	}
+	lockdep_set_selftest_task(NULL);
 	debug_locks_silent = 0;
 }
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 236eb21167b5..4525fb094844 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -20,7 +20,8 @@
 static noinline size_t
 lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
 		    unsigned char *out, size_t *out_len,
-		    size_t ti, void *wrkmem)
+		    size_t ti, void *wrkmem, signed char *state_offset,
+		    const unsigned char bitstream_version)
 {
 	const unsigned char *ip;
 	unsigned char *op;
@@ -35,27 +36,85 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
 	ip += ti < 4 ? 4 - ti : 0;
 
 	for (;;) {
-		const unsigned char *m_pos;
+		const unsigned char *m_pos = NULL;
 		size_t t, m_len, m_off;
 		u32 dv;
+		u32 run_length = 0;
 literal:
 		ip += 1 + ((ip - ii) >> 5);
 next:
 		if (unlikely(ip >= ip_end))
 			break;
 		dv = get_unaligned_le32(ip);
-		t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
-		m_pos = in + dict[t];
-		dict[t] = (lzo_dict_t) (ip - in);
-		if (unlikely(dv != get_unaligned_le32(m_pos)))
-			goto literal;
+
+		if (dv == 0 && bitstream_version) {
+			const unsigned char *ir = ip + 4;
+			const unsigned char *limit = ip_end
+				< (ip + MAX_ZERO_RUN_LENGTH + 1)
+				? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
+	defined(LZO_FAST_64BIT_MEMORY_ACCESS)
+			u64 dv64;
+
+			for (; (ir + 32) <= limit; ir += 32) {
+				dv64 = get_unaligned((u64 *)ir);
+				dv64 |= get_unaligned((u64 *)ir + 1);
+				dv64 |= get_unaligned((u64 *)ir + 2);
+				dv64 |= get_unaligned((u64 *)ir + 3);
+				if (dv64)
+					break;
+			}
+			for (; (ir + 8) <= limit; ir += 8) {
+				dv64 = get_unaligned((u64 *)ir);
+				if (dv64) {
+#  if defined(__LITTLE_ENDIAN)
+					ir += __builtin_ctzll(dv64) >> 3;
+#  elif defined(__BIG_ENDIAN)
+					ir += __builtin_clzll(dv64) >> 3;
+#  else
+#    error "missing endian definition"
+#  endif
+					break;
+				}
+			}
+#else
+			while ((ir < (const unsigned char *)
+					ALIGN((uintptr_t)ir, 4)) &&
+					(ir < limit) && (*ir == 0))
+				ir++;
+			for (; (ir + 4) <= limit; ir += 4) {
+				dv = *((u32 *)ir);
+				if (dv) {
+#  if defined(__LITTLE_ENDIAN)
+					ir += __builtin_ctz(dv) >> 3;
+#  elif defined(__BIG_ENDIAN)
+					ir += __builtin_clz(dv) >> 3;
+#  else
+#    error "missing endian definition"
+#  endif
+					break;
+				}
+			}
+#endif
+			while (likely(ir < limit) && unlikely(*ir == 0))
+				ir++;
+			run_length = ir - ip;
+			if (run_length > MAX_ZERO_RUN_LENGTH)
+				run_length = MAX_ZERO_RUN_LENGTH;
+		} else {
+			t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
+			m_pos = in + dict[t];
+			dict[t] = (lzo_dict_t) (ip - in);
+			if (unlikely(dv != get_unaligned_le32(m_pos)))
+				goto literal;
+		}
 
 		ii -= ti;
 		ti = 0;
 		t = ip - ii;
 		if (t != 0) {
 			if (t <= 3) {
-				op[-2] |= t;
+				op[*state_offset] |= t;
 				COPY4(op, ii);
 				op += t;
 			} else if (t <= 16) {
@@ -88,6 +147,17 @@ next:
 			}
 		}
 
+		if (unlikely(run_length)) {
+			ip += run_length;
+			run_length -= MIN_ZERO_RUN_LENGTH;
+			put_unaligned_le32((run_length << 21) | 0xfffc18
+					   | (run_length & 0x7), op);
+			op += 4;
+			run_length = 0;
+			*state_offset = -3;
+			goto finished_writing_instruction;
+		}
+
 		m_len = 4;
 		{
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
@@ -170,7 +240,6 @@ m_len_done:
 
 		m_off = ip - m_pos;
 		ip += m_len;
-		ii = ip;
 		if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
 			m_off -= 1;
 			*op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
@@ -207,29 +276,45 @@ m_len_done:
 			*op++ = (m_off << 2);
 			*op++ = (m_off >> 6);
 		}
+		*state_offset = -2;
+finished_writing_instruction:
+		ii = ip;
 		goto next;
 	}
 	*out_len = op - out;
 	return in_end - (ii - ti);
 }
 
-int lzo1x_1_compress(const unsigned char *in, size_t in_len,
+int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
 		     unsigned char *out, size_t *out_len,
-		     void *wrkmem)
+		     void *wrkmem, const unsigned char bitstream_version)
 {
 	const unsigned char *ip = in;
 	unsigned char *op = out;
 	size_t l = in_len;
 	size_t t = 0;
+	signed char state_offset = -2;
+	unsigned int m4_max_offset;
+
+	// LZO v0 will never write 17 as first byte,
+	// so this is used to version the bitstream
+	if (bitstream_version > 0) {
+		*op++ = 17;
+		*op++ = bitstream_version;
+		m4_max_offset = M4_MAX_OFFSET_V1;
+	} else {
+		m4_max_offset = M4_MAX_OFFSET_V0;
+	}
 
 	while (l > 20) {
-		size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
+		size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
 		uintptr_t ll_end = (uintptr_t) ip + ll;
 		if ((ll_end + ((t + ll) >> 5)) <= ll_end)
 			break;
 		BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
 		memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
-		t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
+		t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem,
+					&state_offset, bitstream_version);
 		ip += ll;
 		op += *out_len;
 		l  -= ll;
@@ -242,7 +327,7 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
 		if (op == out && t <= 238) {
 			*op++ = (17 + t);
 		} else if (t <= 3) {
-			op[-2] |= t;
+			op[state_offset] |= t;
 		} else if (t <= 18) {
 			*op++ = (t - 3);
 		} else {
@@ -273,7 +358,24 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
 	*out_len = op - out;
 	return LZO_E_OK;
 }
+
+int lzo1x_1_compress(const unsigned char *in, size_t in_len,
+		     unsigned char *out, size_t *out_len,
+		     void *wrkmem)
+{
+	return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0);
+}
+
+int lzorle1x_1_compress(const unsigned char *in, size_t in_len,
+		     unsigned char *out, size_t *out_len,
+		     void *wrkmem)
+{
+	return lzogeneric1x_1_compress(in, in_len, out, out_len,
+				       wrkmem, LZO_VERSION);
+}
+
 EXPORT_SYMBOL_GPL(lzo1x_1_compress);
+EXPORT_SYMBOL_GPL(lzorle1x_1_compress);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X-1 Compressor");
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index a1c387f6afba..6d2600ea3b55 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -46,11 +46,23 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 	const unsigned char * const ip_end = in + in_len;
 	unsigned char * const op_end = out + *out_len;
 
+	unsigned char bitstream_version;
+
 	op = out;
 	ip = in;
 
 	if (unlikely(in_len < 3))
 		goto input_overrun;
+
+	if (likely(*ip == 17)) {
+		bitstream_version = ip[1];
+		ip += 2;
+		if (unlikely(in_len < 5))
+			goto input_overrun;
+	} else {
+		bitstream_version = 0;
+	}
+
 	if (*ip > 17) {
 		t = *ip++ - 17;
 		if (t < 4) {
@@ -154,32 +166,49 @@ copy_literal_run:
 			m_pos -= next >> 2;
 			next &= 3;
 		} else {
-			m_pos = op;
-			m_pos -= (t & 8) << 11;
-			t = (t & 7) + (3 - 1);
-			if (unlikely(t == 2)) {
-				size_t offset;
-				const unsigned char *ip_last = ip;
+			NEED_IP(2);
+			next = get_unaligned_le16(ip);
+			if (((next & 0xfffc) == 0xfffc) &&
+			    ((t & 0xf8) == 0x18) &&
+			    likely(bitstream_version)) {
+				NEED_IP(3);
+				t &= 7;
+				t |= ip[2] << 3;
+				t += MIN_ZERO_RUN_LENGTH;
+				NEED_OP(t);
+				memset(op, 0, t);
+				op += t;
+				next &= 3;
+				ip += 3;
+				goto match_next;
+			} else {
+				m_pos = op;
+				m_pos -= (t & 8) << 11;
+				t = (t & 7) + (3 - 1);
+				if (unlikely(t == 2)) {
+					size_t offset;
+					const unsigned char *ip_last = ip;
 
-				while (unlikely(*ip == 0)) {
-					ip++;
-					NEED_IP(1);
-				}
-				offset = ip - ip_last;
-				if (unlikely(offset > MAX_255_COUNT))
-					return LZO_E_ERROR;
+					while (unlikely(*ip == 0)) {
+						ip++;
+						NEED_IP(1);
+					}
+					offset = ip - ip_last;
+					if (unlikely(offset > MAX_255_COUNT))
+						return LZO_E_ERROR;
 
-				offset = (offset << 8) - offset;
-				t += offset + 7 + *ip++;
-				NEED_IP(2);
+					offset = (offset << 8) - offset;
+					t += offset + 7 + *ip++;
+					NEED_IP(2);
+					next = get_unaligned_le16(ip);
+				}
+				ip += 2;
+				m_pos -= next >> 2;
+				next &= 3;
+				if (m_pos == op)
+					goto eof_found;
+				m_pos -= 0x4000;
 			}
-			next = get_unaligned_le16(ip);
-			ip += 2;
-			m_pos -= next >> 2;
-			next &= 3;
-			if (m_pos == op)
-				goto eof_found;
-			m_pos -= 0x4000;
 		}
 		TEST_LB(m_pos);
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h
index 4edefd2f540c..b60851fcf6ce 100644
--- a/lib/lzo/lzodefs.h
+++ b/lib/lzo/lzodefs.h
@@ -13,9 +13,15 @@
  */
 
 
+/* Version
+ * 0: original lzo version
+ * 1: lzo with support for RLE
+ */
+#define LZO_VERSION 1
+
 #define COPY4(dst, src)	\
 		put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
-#if defined(__x86_64__)
+#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
 #define COPY8(dst, src)	\
 		put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst))
 #else
@@ -25,19 +31,21 @@
 
 #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
 #error "conflicting endian definitions"
-#elif defined(__x86_64__)
+#elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
 #define LZO_USE_CTZ64	1
 #define LZO_USE_CTZ32	1
-#elif defined(__i386__) || defined(__powerpc__)
+#define LZO_FAST_64BIT_MEMORY_ACCESS
+#elif defined(CONFIG_X86) || defined(CONFIG_PPC)
 #define LZO_USE_CTZ32	1
-#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5)
+#elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5)
 #define LZO_USE_CTZ32	1
 #endif
 
 #define M1_MAX_OFFSET	0x0400
 #define M2_MAX_OFFSET	0x0800
 #define M3_MAX_OFFSET	0x4000
-#define M4_MAX_OFFSET	0xbfff
+#define M4_MAX_OFFSET_V0	0xbfff
+#define M4_MAX_OFFSET_V1	0xbffe
 
 #define M1_MIN_LEN	2
 #define M1_MAX_LEN	2
@@ -53,6 +61,9 @@
 #define M3_MARKER	32
 #define M4_MARKER	16
 
+#define MIN_ZERO_RUN_LENGTH	4
+#define MAX_ZERO_RUN_LENGTH	(2047 + MIN_ZERO_RUN_LENGTH)
+
 #define lzo_dict_t      unsigned short
 #define D_BITS		13
 #define D_SIZE		(1u << D_BITS)
diff --git a/lib/objagg.c b/lib/objagg.c
index c9b457a91153..576be22e86de 100644
--- a/lib/objagg.c
+++ b/lib/objagg.c
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/rhashtable.h>
+#include <linux/idr.h>
 #include <linux/list.h>
 #include <linux/sort.h>
 #include <linux/objagg.h>
@@ -11,6 +12,34 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/objagg.h>
 
+struct objagg_hints {
+	struct rhashtable node_ht;
+	struct rhashtable_params ht_params;
+	struct list_head node_list;
+	unsigned int node_count;
+	unsigned int root_count;
+	unsigned int refcount;
+	const struct objagg_ops *ops;
+};
+
+struct objagg_hints_node {
+	struct rhash_head ht_node; /* member of objagg_hints->node_ht */
+	struct list_head list; /* member of objagg_hints->node_list */
+	struct objagg_hints_node *parent;
+	unsigned int root_id;
+	struct objagg_obj_stats_info stats_info;
+	unsigned long obj[0];
+};
+
+static struct objagg_hints_node *
+objagg_hints_lookup(struct objagg_hints *objagg_hints, void *obj)
+{
+	if (!objagg_hints)
+		return NULL;
+	return rhashtable_lookup_fast(&objagg_hints->node_ht, obj,
+				      objagg_hints->ht_params);
+}
+
 struct objagg {
 	const struct objagg_ops *ops;
 	void *priv;
@@ -18,6 +47,8 @@ struct objagg {
 	struct rhashtable_params ht_params;
 	struct list_head obj_list;
 	unsigned int obj_count;
+	struct ida root_ida;
+	struct objagg_hints *hints;
 };
 
 struct objagg_obj {
@@ -30,6 +61,7 @@ struct objagg_obj {
 		void *delta_priv; /* user delta private */
 		void *root_priv; /* user root private */
 	};
+	unsigned int root_id;
 	unsigned int refcount; /* counts number of users of this object
 				* including nested objects
 				*/
@@ -130,7 +162,8 @@ static struct objagg_obj *objagg_obj_lookup(struct objagg *objagg, void *obj)
 
 static int objagg_obj_parent_assign(struct objagg *objagg,
 				    struct objagg_obj *objagg_obj,
-				    struct objagg_obj *parent)
+				    struct objagg_obj *parent,
+				    bool take_parent_ref)
 {
 	void *delta_priv;
 
@@ -144,7 +177,8 @@ static int objagg_obj_parent_assign(struct objagg *objagg,
 	 */
 	objagg_obj->parent = parent;
 	objagg_obj->delta_priv = delta_priv;
-	objagg_obj_ref_inc(objagg_obj->parent);
+	if (take_parent_ref)
+		objagg_obj_ref_inc(objagg_obj->parent);
 	trace_objagg_obj_parent_assign(objagg, objagg_obj,
 				       parent,
 				       parent->refcount);
@@ -164,7 +198,7 @@ static int objagg_obj_parent_lookup_assign(struct objagg *objagg,
 		if (!objagg_obj_is_root(objagg_obj_cur))
 			continue;
 		err = objagg_obj_parent_assign(objagg, objagg_obj,
-					       objagg_obj_cur);
+					       objagg_obj_cur, true);
 		if (!err)
 			return 0;
 	}
@@ -184,16 +218,68 @@ static void objagg_obj_parent_unassign(struct objagg *objagg,
 	__objagg_obj_put(objagg, objagg_obj->parent);
 }
 
+static int objagg_obj_root_id_alloc(struct objagg *objagg,
+				    struct objagg_obj *objagg_obj,
+				    struct objagg_hints_node *hnode)
+{
+	unsigned int min, max;
+	int root_id;
+
+	/* In case there are no hints available, the root id is invalid. */
+	if (!objagg->hints) {
+		objagg_obj->root_id = OBJAGG_OBJ_ROOT_ID_INVALID;
+		return 0;
+	}
+
+	if (hnode) {
+		min = hnode->root_id;
+		max = hnode->root_id;
+	} else {
+		/* For objects with no hint, start after the last
+		 * hinted root_id.
+		 */
+		min = objagg->hints->root_count;
+		max = ~0;
+	}
+
+	root_id = ida_alloc_range(&objagg->root_ida, min, max, GFP_KERNEL);
+
+	if (root_id < 0)
+		return root_id;
+	objagg_obj->root_id = root_id;
+	return 0;
+}
+
+static void objagg_obj_root_id_free(struct objagg *objagg,
+				    struct objagg_obj *objagg_obj)
+{
+	if (!objagg->hints)
+		return;
+	ida_free(&objagg->root_ida, objagg_obj->root_id);
+}
+
 static int objagg_obj_root_create(struct objagg *objagg,
-				  struct objagg_obj *objagg_obj)
+				  struct objagg_obj *objagg_obj,
+				  struct objagg_hints_node *hnode)
 {
-	objagg_obj->root_priv = objagg->ops->root_create(objagg->priv,
-							 objagg_obj->obj);
-	if (IS_ERR(objagg_obj->root_priv))
-		return PTR_ERR(objagg_obj->root_priv);
+	int err;
 
+	err = objagg_obj_root_id_alloc(objagg, objagg_obj, hnode);
+	if (err)
+		return err;
+	objagg_obj->root_priv = objagg->ops->root_create(objagg->priv,
+							 objagg_obj->obj,
+							 objagg_obj->root_id);
+	if (IS_ERR(objagg_obj->root_priv)) {
+		err = PTR_ERR(objagg_obj->root_priv);
+		goto err_root_create;
+	}
 	trace_objagg_obj_root_create(objagg, objagg_obj);
 	return 0;
+
+err_root_create:
+	objagg_obj_root_id_free(objagg, objagg_obj);
+	return err;
 }
 
 static void objagg_obj_root_destroy(struct objagg *objagg,
@@ -201,19 +287,69 @@ static void objagg_obj_root_destroy(struct objagg *objagg,
 {
 	trace_objagg_obj_root_destroy(objagg, objagg_obj);
 	objagg->ops->root_destroy(objagg->priv, objagg_obj->root_priv);
+	objagg_obj_root_id_free(objagg, objagg_obj);
+}
+
+static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj);
+
+static int objagg_obj_init_with_hints(struct objagg *objagg,
+				      struct objagg_obj *objagg_obj,
+				      bool *hint_found)
+{
+	struct objagg_hints_node *hnode;
+	struct objagg_obj *parent;
+	int err;
+
+	hnode = objagg_hints_lookup(objagg->hints, objagg_obj->obj);
+	if (!hnode) {
+		*hint_found = false;
+		return 0;
+	}
+	*hint_found = true;
+
+	if (!hnode->parent)
+		return objagg_obj_root_create(objagg, objagg_obj, hnode);
+
+	parent = __objagg_obj_get(objagg, hnode->parent->obj);
+	if (IS_ERR(parent))
+		return PTR_ERR(parent);
+
+	err = objagg_obj_parent_assign(objagg, objagg_obj, parent, false);
+	if (err) {
+		*hint_found = false;
+		err = 0;
+		goto err_parent_assign;
+	}
+
+	return 0;
+
+err_parent_assign:
+	objagg_obj_put(objagg, parent);
+	return err;
 }
 
 static int objagg_obj_init(struct objagg *objagg,
 			   struct objagg_obj *objagg_obj)
 {
+	bool hint_found;
 	int err;
 
+	/* First, try to use hints if they are available and
+	 * if they provide result.
+	 */
+	err = objagg_obj_init_with_hints(objagg, objagg_obj, &hint_found);
+	if (err)
+		return err;
+
+	if (hint_found)
+		return 0;
+
 	/* Try to find if the object can be aggregated under an existing one. */
 	err = objagg_obj_parent_lookup_assign(objagg, objagg_obj);
 	if (!err)
 		return 0;
 	/* If aggregation is not possible, make the object a root. */
-	return objagg_obj_root_create(objagg, objagg_obj);
+	return objagg_obj_root_create(objagg, objagg_obj, NULL);
 }
 
 static void objagg_obj_fini(struct objagg *objagg,
@@ -349,8 +485,9 @@ EXPORT_SYMBOL(objagg_obj_put);
 
 /**
  * objagg_create - creates a new objagg instance
- * @ops:	user-specific callbacks
- * @priv:	pointer to a private data passed to the ops
+ * @ops:		user-specific callbacks
+ * @objagg_hints:	hints, can be NULL
+ * @priv:		pointer to a private data passed to the ops
  *
  * Note: all locking must be provided by the caller.
  *
@@ -374,18 +511,25 @@ EXPORT_SYMBOL(objagg_obj_put);
  * Returns a pointer to newly created objagg instance in case of success,
  * otherwise it returns pointer error using ERR_PTR macro.
  */
-struct objagg *objagg_create(const struct objagg_ops *ops, void *priv)
+struct objagg *objagg_create(const struct objagg_ops *ops,
+			     struct objagg_hints *objagg_hints, void *priv)
 {
 	struct objagg *objagg;
 	int err;
 
 	if (WARN_ON(!ops || !ops->root_create || !ops->root_destroy ||
-		    !ops->delta_create || !ops->delta_destroy))
+		    !ops->delta_check || !ops->delta_create ||
+		    !ops->delta_destroy))
 		return ERR_PTR(-EINVAL);
+
 	objagg = kzalloc(sizeof(*objagg), GFP_KERNEL);
 	if (!objagg)
 		return ERR_PTR(-ENOMEM);
 	objagg->ops = ops;
+	if (objagg_hints) {
+		objagg->hints = objagg_hints;
+		objagg_hints->refcount++;
+	}
 	objagg->priv = priv;
 	INIT_LIST_HEAD(&objagg->obj_list);
 
@@ -397,6 +541,8 @@ struct objagg *objagg_create(const struct objagg_ops *ops, void *priv)
 	if (err)
 		goto err_rhashtable_init;
 
+	ida_init(&objagg->root_ida);
+
 	trace_objagg_create(objagg);
 	return objagg;
 
@@ -415,8 +561,11 @@ EXPORT_SYMBOL(objagg_create);
 void objagg_destroy(struct objagg *objagg)
 {
 	trace_objagg_destroy(objagg);
+	ida_destroy(&objagg->root_ida);
 	WARN_ON(!list_empty(&objagg->obj_list));
 	rhashtable_destroy(&objagg->obj_ht);
+	if (objagg->hints)
+		objagg_hints_put(objagg->hints);
 	kfree(objagg);
 }
 EXPORT_SYMBOL(objagg_destroy);
@@ -472,6 +621,8 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg)
 		objagg_stats->stats_info[i].objagg_obj = objagg_obj;
 		objagg_stats->stats_info[i].is_root =
 					objagg_obj_is_root(objagg_obj);
+		if (objagg_stats->stats_info[i].is_root)
+			objagg_stats->root_count++;
 		i++;
 	}
 	objagg_stats->stats_info_count = i;
@@ -485,7 +636,7 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg)
 EXPORT_SYMBOL(objagg_stats_get);
 
 /**
- * objagg_stats_puts - puts stats of the objagg instance
+ * objagg_stats_put - puts stats of the objagg instance
  * @objagg_stats:	objagg instance stats
  *
  * Note: all locking must be provided by the caller.
@@ -496,6 +647,410 @@ void objagg_stats_put(const struct objagg_stats *objagg_stats)
 }
 EXPORT_SYMBOL(objagg_stats_put);
 
+static struct objagg_hints_node *
+objagg_hints_node_create(struct objagg_hints *objagg_hints,
+			 struct objagg_obj *objagg_obj, size_t obj_size,
+			 struct objagg_hints_node *parent_hnode)
+{
+	unsigned int user_count = objagg_obj->stats.user_count;
+	struct objagg_hints_node *hnode;
+	int err;
+
+	hnode = kzalloc(sizeof(*hnode) + obj_size, GFP_KERNEL);
+	if (!hnode)
+		return ERR_PTR(-ENOMEM);
+	memcpy(hnode->obj, &objagg_obj->obj, obj_size);
+	hnode->stats_info.stats.user_count = user_count;
+	hnode->stats_info.stats.delta_user_count = user_count;
+	if (parent_hnode) {
+		parent_hnode->stats_info.stats.delta_user_count += user_count;
+	} else {
+		hnode->root_id = objagg_hints->root_count++;
+		hnode->stats_info.is_root = true;
+	}
+	hnode->stats_info.objagg_obj = objagg_obj;
+
+	err = rhashtable_insert_fast(&objagg_hints->node_ht, &hnode->ht_node,
+				     objagg_hints->ht_params);
+	if (err)
+		goto err_ht_insert;
+
+	list_add(&hnode->list, &objagg_hints->node_list);
+	hnode->parent = parent_hnode;
+	objagg_hints->node_count++;
+
+	return hnode;
+
+err_ht_insert:
+	kfree(hnode);
+	return ERR_PTR(err);
+}
+
+static void objagg_hints_flush(struct objagg_hints *objagg_hints)
+{
+	struct objagg_hints_node *hnode, *tmp;
+
+	list_for_each_entry_safe(hnode, tmp, &objagg_hints->node_list, list) {
+		list_del(&hnode->list);
+		rhashtable_remove_fast(&objagg_hints->node_ht, &hnode->ht_node,
+				       objagg_hints->ht_params);
+		kfree(hnode);
+	}
+}
+
+struct objagg_tmp_node {
+	struct objagg_obj *objagg_obj;
+	bool crossed_out;
+};
+
+struct objagg_tmp_graph {
+	struct objagg_tmp_node *nodes;
+	unsigned long nodes_count;
+	unsigned long *edges;
+};
+
+static int objagg_tmp_graph_edge_index(struct objagg_tmp_graph *graph,
+				       int parent_index, int index)
+{
+	return index * graph->nodes_count + parent_index;
+}
+
+static void objagg_tmp_graph_edge_set(struct objagg_tmp_graph *graph,
+				      int parent_index, int index)
+{
+	int edge_index = objagg_tmp_graph_edge_index(graph, index,
+						     parent_index);
+
+	__set_bit(edge_index, graph->edges);
+}
+
+static bool objagg_tmp_graph_is_edge(struct objagg_tmp_graph *graph,
+				     int parent_index, int index)
+{
+	int edge_index = objagg_tmp_graph_edge_index(graph, index,
+						     parent_index);
+
+	return test_bit(edge_index, graph->edges);
+}
+
+static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph,
+						 unsigned int index)
+{
+	struct objagg_tmp_node *node = &graph->nodes[index];
+	unsigned int weight = node->objagg_obj->stats.user_count;
+	int j;
+
+	/* Node weight is sum of node users and all other nodes users
+	 * that this node can represent with delta.
+	 */
+
+	for (j = 0; j < graph->nodes_count; j++) {
+		if (!objagg_tmp_graph_is_edge(graph, index, j))
+			continue;
+		node = &graph->nodes[j];
+		if (node->crossed_out)
+			continue;
+		weight += node->objagg_obj->stats.user_count;
+	}
+	return weight;
+}
+
+static int objagg_tmp_graph_node_max_weight(struct objagg_tmp_graph *graph)
+{
+	struct objagg_tmp_node *node;
+	unsigned int max_weight = 0;
+	unsigned int weight;
+	int max_index = -1;
+	int i;
+
+	for (i = 0; i < graph->nodes_count; i++) {
+		node = &graph->nodes[i];
+		if (node->crossed_out)
+			continue;
+		weight = objagg_tmp_graph_node_weight(graph, i);
+		if (weight >= max_weight) {
+			max_weight = weight;
+			max_index = i;
+		}
+	}
+	return max_index;
+}
+
+static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg)
+{
+	unsigned int nodes_count = objagg->obj_count;
+	struct objagg_tmp_graph *graph;
+	struct objagg_tmp_node *node;
+	struct objagg_tmp_node *pnode;
+	struct objagg_obj *objagg_obj;
+	size_t alloc_size;
+	int i, j;
+
+	graph = kzalloc(sizeof(*graph), GFP_KERNEL);
+	if (!graph)
+		return NULL;
+
+	graph->nodes = kcalloc(nodes_count, sizeof(*graph->nodes), GFP_KERNEL);
+	if (!graph->nodes)
+		goto err_nodes_alloc;
+	graph->nodes_count = nodes_count;
+
+	alloc_size = BITS_TO_LONGS(nodes_count * nodes_count) *
+		     sizeof(unsigned long);
+	graph->edges = kzalloc(alloc_size, GFP_KERNEL);
+	if (!graph->edges)
+		goto err_edges_alloc;
+
+	i = 0;
+	list_for_each_entry(objagg_obj, &objagg->obj_list, list) {
+		node = &graph->nodes[i++];
+		node->objagg_obj = objagg_obj;
+	}
+
+	/* Assemble a temporary graph. Insert edge X->Y in case Y can be
+	 * in delta of X.
+	 */
+	for (i = 0; i < nodes_count; i++) {
+		for (j = 0; j < nodes_count; j++) {
+			if (i == j)
+				continue;
+			pnode = &graph->nodes[i];
+			node = &graph->nodes[j];
+			if (objagg->ops->delta_check(objagg->priv,
+						     pnode->objagg_obj->obj,
+						     node->objagg_obj->obj)) {
+				objagg_tmp_graph_edge_set(graph, i, j);
+
+			}
+		}
+	}
+	return graph;
+
+err_edges_alloc:
+	kfree(graph->nodes);
+err_nodes_alloc:
+	kfree(graph);
+	return NULL;
+}
+
+static void objagg_tmp_graph_destroy(struct objagg_tmp_graph *graph)
+{
+	kfree(graph->edges);
+	kfree(graph->nodes);
+	kfree(graph);
+}
+
+static int
+objagg_opt_simple_greedy_fillup_hints(struct objagg_hints *objagg_hints,
+				      struct objagg *objagg)
+{
+	struct objagg_hints_node *hnode, *parent_hnode;
+	struct objagg_tmp_graph *graph;
+	struct objagg_tmp_node *node;
+	int index;
+	int j;
+	int err;
+
+	graph = objagg_tmp_graph_create(objagg);
+	if (!graph)
+		return -ENOMEM;
+
+	/* Find the nodes from the ones that can accommodate most users
+	 * and cross them out of the graph. Save them to the hint list.
+	 */
+	while ((index = objagg_tmp_graph_node_max_weight(graph)) != -1) {
+		node = &graph->nodes[index];
+		node->crossed_out = true;
+		hnode = objagg_hints_node_create(objagg_hints,
+						 node->objagg_obj,
+						 objagg->ops->obj_size,
+						 NULL);
+		if (IS_ERR(hnode)) {
+			err = PTR_ERR(hnode);
+			goto out;
+		}
+		parent_hnode = hnode;
+		for (j = 0; j < graph->nodes_count; j++) {
+			if (!objagg_tmp_graph_is_edge(graph, index, j))
+				continue;
+			node = &graph->nodes[j];
+			if (node->crossed_out)
+				continue;
+			node->crossed_out = true;
+			hnode = objagg_hints_node_create(objagg_hints,
+							 node->objagg_obj,
+							 objagg->ops->obj_size,
+							 parent_hnode);
+			if (IS_ERR(hnode)) {
+				err = PTR_ERR(hnode);
+				goto out;
+			}
+		}
+	}
+
+	err = 0;
+out:
+	objagg_tmp_graph_destroy(graph);
+	return err;
+}
+
+struct objagg_opt_algo {
+	int (*fillup_hints)(struct objagg_hints *objagg_hints,
+			    struct objagg *objagg);
+};
+
+static const struct objagg_opt_algo objagg_opt_simple_greedy = {
+	.fillup_hints = objagg_opt_simple_greedy_fillup_hints,
+};
+
+
+static const struct objagg_opt_algo *objagg_opt_algos[] = {
+	[OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy,
+};
+
+static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg,
+				const void *obj)
+{
+	struct rhashtable *ht = arg->ht;
+	struct objagg_hints *objagg_hints =
+			container_of(ht, struct objagg_hints, node_ht);
+	const struct objagg_ops *ops = objagg_hints->ops;
+	const char *ptr = obj;
+
+	ptr += ht->p.key_offset;
+	return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) :
+				    memcmp(ptr, arg->key, ht->p.key_len);
+}
+
+/**
+ * objagg_hints_get - obtains hints instance
+ * @objagg:		objagg instance
+ * @opt_algo_type:	type of hints finding algorithm
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * According to the algo type, the existing objects of objagg instance
+ * are going to be went-through to assemble an optimal tree. We call this
+ * tree hints. These hints can be later on used for creation of
+ * a new objagg instance. There, the future object creations are going
+ * to be consulted with these hints in order to find out, where exactly
+ * the new object should be put as a root or delta.
+ *
+ * Returns a pointer to hints instance in case of success,
+ * otherwise it returns pointer error using ERR_PTR macro.
+ */
+struct objagg_hints *objagg_hints_get(struct objagg *objagg,
+				      enum objagg_opt_algo_type opt_algo_type)
+{
+	const struct objagg_opt_algo *algo = objagg_opt_algos[opt_algo_type];
+	struct objagg_hints *objagg_hints;
+	int err;
+
+	objagg_hints = kzalloc(sizeof(*objagg_hints), GFP_KERNEL);
+	if (!objagg_hints)
+		return ERR_PTR(-ENOMEM);
+
+	objagg_hints->ops = objagg->ops;
+	objagg_hints->refcount = 1;
+
+	INIT_LIST_HEAD(&objagg_hints->node_list);
+
+	objagg_hints->ht_params.key_len = objagg->ops->obj_size;
+	objagg_hints->ht_params.key_offset =
+				offsetof(struct objagg_hints_node, obj);
+	objagg_hints->ht_params.head_offset =
+				offsetof(struct objagg_hints_node, ht_node);
+	objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp;
+
+	err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params);
+	if (err)
+		goto err_rhashtable_init;
+
+	err = algo->fillup_hints(objagg_hints, objagg);
+	if (err)
+		goto err_fillup_hints;
+
+	if (WARN_ON(objagg_hints->node_count != objagg->obj_count)) {
+		err = -EINVAL;
+		goto err_node_count_check;
+	}
+
+	return objagg_hints;
+
+err_node_count_check:
+err_fillup_hints:
+	objagg_hints_flush(objagg_hints);
+	rhashtable_destroy(&objagg_hints->node_ht);
+err_rhashtable_init:
+	kfree(objagg_hints);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(objagg_hints_get);
+
+/**
+ * objagg_hints_put - puts hints instance
+ * @objagg_hints:	objagg hints instance
+ *
+ * Note: all locking must be provided by the caller.
+ */
+void objagg_hints_put(struct objagg_hints *objagg_hints)
+{
+	if (--objagg_hints->refcount)
+		return;
+	objagg_hints_flush(objagg_hints);
+	rhashtable_destroy(&objagg_hints->node_ht);
+	kfree(objagg_hints);
+}
+EXPORT_SYMBOL(objagg_hints_put);
+
+/**
+ * objagg_hints_stats_get - obtains stats of the hints instance
+ * @objagg_hints:	hints instance
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * The returned structure contains statistics of all objects
+ * currently in use, ordered by following rules:
+ * 1) Root objects are always on lower indexes than the rest.
+ * 2) Objects with higher delta user count are always on lower
+ *    indexes.
+ * 3) In case multiple objects have the same delta user count,
+ *    the objects are ordered by user count.
+ *
+ * Returns a pointer to stats instance in case of success,
+ * otherwise it returns pointer error using ERR_PTR macro.
+ */
+const struct objagg_stats *
+objagg_hints_stats_get(struct objagg_hints *objagg_hints)
+{
+	struct objagg_stats *objagg_stats;
+	struct objagg_hints_node *hnode;
+	int i;
+
+	objagg_stats = kzalloc(struct_size(objagg_stats, stats_info,
+					   objagg_hints->node_count),
+			       GFP_KERNEL);
+	if (!objagg_stats)
+		return ERR_PTR(-ENOMEM);
+
+	i = 0;
+	list_for_each_entry(hnode, &objagg_hints->node_list, list) {
+		memcpy(&objagg_stats->stats_info[i], &hnode->stats_info,
+		       sizeof(objagg_stats->stats_info[0]));
+		if (objagg_stats->stats_info[i].is_root)
+			objagg_stats->root_count++;
+		i++;
+	}
+	objagg_stats->stats_info_count = i;
+
+	sort(objagg_stats->stats_info, objagg_stats->stats_info_count,
+	     sizeof(struct objagg_obj_stats_info),
+	     objagg_stats_info_sort_cmp_func, NULL);
+
+	return objagg_stats;
+}
+EXPORT_SYMBOL(objagg_hints_stats_get);
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Object aggregation manager");
diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc
index d5242f544551..b7c68030da4f 100644
--- a/lib/raid6/neon.uc
+++ b/lib/raid6/neon.uc
@@ -28,7 +28,6 @@
 
 typedef uint8x16_t unative_t;
 
-#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
 #define NSIZE	sizeof(unative_t)
 
 /*
@@ -61,7 +60,7 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
 	int d, z, z0;
 
 	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
-	const unative_t x1d = NBYTES(0x1d);
+	const unative_t x1d = vdupq_n_u8(0x1d);
 
 	z0 = disks - 3;		/* Highest data disk */
 	p = dptr[z0+1];		/* XOR parity */
@@ -92,7 +91,7 @@ void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
 	int d, z, z0;
 
 	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
-	const unative_t x1d = NBYTES(0x1d);
+	const unative_t x1d = vdupq_n_u8(0x1d);
 
 	z0 = stop;		/* P/Q right side optimization */
 	p = dptr[disks-2];	/* XOR parity */
diff --git a/lib/raid6/recov_neon_inner.c b/lib/raid6/recov_neon_inner.c
index 8cd20c9f834a..f13c07f82297 100644
--- a/lib/raid6/recov_neon_inner.c
+++ b/lib/raid6/recov_neon_inner.c
@@ -10,11 +10,6 @@
 
 #include <arm_neon.h>
 
-static const uint8x16_t x0f = {
-	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-	0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-};
-
 #ifdef CONFIG_ARM
 /*
  * AArch32 does not provide this intrinsic natively because it does not
@@ -41,6 +36,7 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
 	uint8x16_t pm1 = vld1q_u8(pbmul + 16);
 	uint8x16_t qm0 = vld1q_u8(qmul);
 	uint8x16_t qm1 = vld1q_u8(qmul + 16);
+	uint8x16_t x0f = vdupq_n_u8(0x0f);
 
 	/*
 	 * while ( bytes-- ) {
@@ -60,14 +56,14 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
 		px = veorq_u8(vld1q_u8(p), vld1q_u8(dp));
 		vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
 
-		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
+		vy = vshrq_n_u8(vx, 4);
 		vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
-		vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
+		vy = vqtbl1q_u8(qm1, vy);
 		qx = veorq_u8(vx, vy);
 
-		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4);
+		vy = vshrq_n_u8(px, 4);
 		vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f));
-		vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f));
+		vy = vqtbl1q_u8(pm1, vy);
 		vx = veorq_u8(vx, vy);
 		db = veorq_u8(vx, qx);
 
@@ -87,6 +83,7 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
 {
 	uint8x16_t qm0 = vld1q_u8(qmul);
 	uint8x16_t qm1 = vld1q_u8(qmul + 16);
+	uint8x16_t x0f = vdupq_n_u8(0x0f);
 
 	/*
 	 * while (bytes--) {
@@ -100,9 +97,9 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
 
 		vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
 
-		vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
+		vy = vshrq_n_u8(vx, 4);
 		vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
-		vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
+		vy = vqtbl1q_u8(qm1, vy);
 		vx = veorq_u8(vx, vy);
 		vy = veorq_u8(vx, vld1q_u8(p));
 
diff --git a/lib/refcount.c b/lib/refcount.c
index ebcf8cd49e05..6e904af0fb3e 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -33,6 +33,9 @@
  * Note that the allocator is responsible for ordering things between free()
  * and alloc().
  *
+ * The decrements dec_and_test() and sub_and_test() also provide acquire
+ * ordering on success.
+ *
  */
 
 #include <linux/mutex.h>
@@ -164,8 +167,8 @@ EXPORT_SYMBOL(refcount_inc_checked);
  * at UINT_MAX.
  *
  * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
  *
  * Use of this function is not recommended for the normal reference counting
  * use case in which references are taken and released one at a time.  In these
@@ -190,7 +193,12 @@ bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r)
 
 	} while (!atomic_try_cmpxchg_release(&r->refs, &val, new));
 
-	return !new;
+	if (!new) {
+		smp_acquire__after_ctrl_dep();
+		return true;
+	}
+	return false;
+
 }
 EXPORT_SYMBOL(refcount_sub_and_test_checked);
 
@@ -202,8 +210,8 @@ EXPORT_SYMBOL(refcount_sub_and_test_checked);
  * decrement when saturated at UINT_MAX.
  *
  * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 852ffa5160f1..0a105d4af166 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -682,7 +682,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
  * rhashtable_walk_exit - Free an iterator
  * @iter:	Hash table Iterator
  *
- * This function frees resources allocated by rhashtable_walk_init.
+ * This function frees resources allocated by rhashtable_walk_enter.
  */
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 65c2d06250a6..5b382c1244ed 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -26,14 +26,10 @@
 static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
 {
 	unsigned long mask, val;
-	unsigned long __maybe_unused flags;
 	bool ret = false;
+	unsigned long flags;
 
-	/* Silence bogus lockdep warning */
-#if defined(CONFIG_LOCKDEP)
-	local_irq_save(flags);
-#endif
-	spin_lock(&sb->map[index].swap_lock);
+	spin_lock_irqsave(&sb->map[index].swap_lock, flags);
 
 	if (!sb->map[index].cleared)
 		goto out_unlock;
@@ -54,10 +50,7 @@ static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index)
 
 	ret = true;
 out_unlock:
-	spin_unlock(&sb->map[index].swap_lock);
-#if defined(CONFIG_LOCKDEP)
-	local_irq_restore(flags);
-#endif
+	spin_unlock_irqrestore(&sb->map[index].swap_lock, flags);
 	return ret;
 }
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 9ba349e775ef..739dc9fe2c55 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -625,6 +625,32 @@ bool __sg_page_iter_next(struct sg_page_iter *piter)
 }
 EXPORT_SYMBOL(__sg_page_iter_next);
 
+static int sg_dma_page_count(struct scatterlist *sg)
+{
+	return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
+}
+
+bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
+{
+	struct sg_page_iter *piter = &dma_iter->base;
+
+	if (!piter->__nents || !piter->sg)
+		return false;
+
+	piter->sg_pgoffset += piter->__pg_advance;
+	piter->__pg_advance = 1;
+
+	while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
+		piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
+		piter->sg = sg_next(piter->sg);
+		if (!--piter->__nents || !piter->sg)
+			return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(__sg_page_iter_dma_next);
+
 /**
  * sg_miter_start - start mapping iteration over a sg list
  * @miter: sg mapping iter to be started
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 85925aaa4fff..157d9e31f6c2 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -5,10 +5,11 @@
  * DEBUG_PREEMPT variant of smp_processor_id().
  */
 #include <linux/export.h>
+#include <linux/kprobes.h>
 #include <linux/sched.h>
 
-notrace static unsigned int check_preemption_disabled(const char *what1,
-							const char *what2)
+notrace static nokprobe_inline
+unsigned int check_preemption_disabled(const char *what1, const char *what2)
 {
 	int this_cpu = raw_smp_processor_id();
 
@@ -56,9 +57,11 @@ notrace unsigned int debug_smp_processor_id(void)
 	return check_preemption_disabled("smp_processor_id", "");
 }
 EXPORT_SYMBOL(debug_smp_processor_id);
+NOKPROBE_SYMBOL(debug_smp_processor_id);
 
 notrace void __this_cpu_preempt_check(const char *op)
 {
 	check_preemption_disabled("__this_cpu_", op);
 }
 EXPORT_SYMBOL(__this_cpu_preempt_check);
+NOKPROBE_SYMBOL(__this_cpu_preempt_check);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index f3e570722a7e..0845f635f404 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6668,12 +6668,14 @@ static int __run_one(const struct bpf_prog *fp, const void *data,
 	u64 start, finish;
 	int ret = 0, i;
 
+	preempt_disable();
 	start = ktime_get_ns();
 
 	for (i = 0; i < runs; i++)
 		ret = BPF_PROG_RUN(fp, data);
 
 	finish = ktime_get_ns();
+	preempt_enable();
 
 	*duration = finish - start;
 	do_div(*duration, runs);
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 7cab9a9869ac..7222093ee00b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -631,11 +631,6 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
 
 	for (i = 0; i < test_fw_config->num_requests; i++) {
 		req = &test_fw_config->reqs[i];
-		if (!req) {
-			WARN_ON(1);
-			rc = -ENOMEM;
-			goto out_bail;
-		}
 		req->fw = NULL;
 		req->idx = i;
 		req->name = test_fw_config->name;
@@ -737,10 +732,6 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
 
 	for (i = 0; i < test_fw_config->num_requests; i++) {
 		req = &test_fw_config->reqs[i];
-		if (!req) {
-			WARN_ON(1);
-			goto out_bail;
-		}
 		req->name = test_fw_config->name;
 		req->fw = NULL;
 		req->idx = i;
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 51b78405bf24..7de2702621dc 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -480,29 +480,6 @@ static noinline void __init copy_user_test(void)
 	kfree(kmem);
 }
 
-static noinline void __init use_after_scope_test(void)
-{
-	volatile char *volatile p;
-
-	pr_info("use-after-scope on int\n");
-	{
-		int local = 0;
-
-		p = (char *)&local;
-	}
-	p[0] = 1;
-	p[3] = 1;
-
-	pr_info("use-after-scope on array\n");
-	{
-		char local[1024] = {0};
-
-		p = local;
-	}
-	p[0] = 1;
-	p[1023] = 1;
-}
-
 static noinline void __init kasan_alloca_oob_left(void)
 {
 	volatile int i = 10;
@@ -682,7 +659,6 @@ static int __init kmalloc_tests_init(void)
 	kasan_alloca_oob_right();
 	ksize_unpoisons_memory();
 	copy_user_test();
-	use_after_scope_test();
 	kmem_cache_double_free();
 	kmem_cache_invalid_free();
 	kasan_memchr();
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index d82d022111e0..9cf77628fc91 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config)
 	config->test_driver = NULL;
 
 	kfree_const(config->test_fs);
-	config->test_driver = NULL;
+	config->test_fs = NULL;
 }
 
 static void kmod_config_free(struct kmod_test_device *test_dev)
diff --git a/lib/test_objagg.c b/lib/test_objagg.c
index ab57144bb0cd..72c1abfa154d 100644
--- a/lib/test_objagg.c
+++ b/lib/test_objagg.c
@@ -87,6 +87,15 @@ static void world_obj_put(struct world *world, struct objagg *objagg,
 
 #define MAX_KEY_ID_DIFF 5
 
+static bool delta_check(void *priv, const void *parent_obj, const void *obj)
+{
+	const struct tokey *parent_key = parent_obj;
+	const struct tokey *key = obj;
+	int diff = key->id - parent_key->id;
+
+	return diff >= 0 && diff <= MAX_KEY_ID_DIFF;
+}
+
 static void *delta_create(void *priv, void *parent_obj, void *obj)
 {
 	struct tokey *parent_key = parent_obj;
@@ -95,7 +104,7 @@ static void *delta_create(void *priv, void *parent_obj, void *obj)
 	int diff = key->id - parent_key->id;
 	struct delta *delta;
 
-	if (diff < 0 || diff > MAX_KEY_ID_DIFF)
+	if (!delta_check(priv, parent_obj, obj))
 		return ERR_PTR(-EINVAL);
 
 	delta = kzalloc(sizeof(*delta), GFP_KERNEL);
@@ -115,7 +124,7 @@ static void delta_destroy(void *priv, void *delta_priv)
 	kfree(delta);
 }
 
-static void *root_create(void *priv, void *obj)
+static void *root_create(void *priv, void *obj, unsigned int id)
 {
 	struct world *world = priv;
 	struct tokey *key = obj;
@@ -268,6 +277,12 @@ stats_put:
 	return err;
 }
 
+static bool delta_check_dummy(void *priv, const void *parent_obj,
+			      const void *obj)
+{
+	return false;
+}
+
 static void *delta_create_dummy(void *priv, void *parent_obj, void *obj)
 {
 	return ERR_PTR(-EOPNOTSUPP);
@@ -279,6 +294,7 @@ static void delta_destroy_dummy(void *priv, void *delta_priv)
 
 static const struct objagg_ops nodelta_ops = {
 	.obj_size = sizeof(struct tokey),
+	.delta_check = delta_check_dummy,
 	.delta_create = delta_create_dummy,
 	.delta_destroy = delta_destroy_dummy,
 	.root_create = root_create,
@@ -292,7 +308,7 @@ static int test_nodelta(void)
 	int i;
 	int err;
 
-	objagg = objagg_create(&nodelta_ops, &world);
+	objagg = objagg_create(&nodelta_ops, NULL, &world);
 	if (IS_ERR(objagg))
 		return PTR_ERR(objagg);
 
@@ -357,6 +373,7 @@ err_stats_second_zero:
 
 static const struct objagg_ops delta_ops = {
 	.obj_size = sizeof(struct tokey),
+	.delta_check = delta_check,
 	.delta_create = delta_create,
 	.delta_destroy = delta_destroy,
 	.root_create = root_create,
@@ -728,8 +745,10 @@ static int check_expect_stats(struct objagg *objagg,
 	int err;
 
 	stats = objagg_stats_get(objagg);
-	if (IS_ERR(stats))
+	if (IS_ERR(stats)) {
+		*errmsg = "objagg_stats_get() failed.";
 		return PTR_ERR(stats);
+	}
 	err = __check_expect_stats(stats, expect_stats, errmsg);
 	objagg_stats_put(stats);
 	return err;
@@ -769,7 +788,6 @@ static int test_delta_action_item(struct world *world,
 	if (err)
 		goto errout;
 
-	errmsg = NULL;
 	err = check_expect_stats(objagg, &action_item->expect_stats, &errmsg);
 	if (err) {
 		pr_err("Key %u: Stats: %s\n", action_item->key_id, errmsg);
@@ -793,7 +811,7 @@ static int test_delta(void)
 	int i;
 	int err;
 
-	objagg = objagg_create(&delta_ops, &world);
+	objagg = objagg_create(&delta_ops, NULL, &world);
 	if (IS_ERR(objagg))
 		return PTR_ERR(objagg);
 
@@ -815,6 +833,170 @@ err_do_action_item:
 	return err;
 }
 
+struct hints_case {
+	const unsigned int *key_ids;
+	size_t key_ids_count;
+	struct expect_stats expect_stats;
+	struct expect_stats expect_stats_hints;
+};
+
+static const unsigned int hints_case_key_ids[] = {
+	1, 7, 3, 5, 3, 1, 30, 8, 8, 5, 6, 8,
+};
+
+static const struct hints_case hints_case = {
+	.key_ids = hints_case_key_ids,
+	.key_ids_count = ARRAY_SIZE(hints_case_key_ids),
+	.expect_stats =
+		EXPECT_STATS(7, ROOT(1, 2, 7), ROOT(7, 1, 4), ROOT(30, 1, 1),
+				DELTA(8, 3), DELTA(3, 2),
+				DELTA(5, 2), DELTA(6, 1)),
+	.expect_stats_hints =
+		EXPECT_STATS(7, ROOT(3, 2, 9), ROOT(1, 2, 2), ROOT(30, 1, 1),
+				DELTA(8, 3), DELTA(5, 2),
+				DELTA(6, 1), DELTA(7, 1)),
+};
+
+static void __pr_debug_stats(const struct objagg_stats *stats)
+{
+	int i;
+
+	for (i = 0; i < stats->stats_info_count; i++)
+		pr_debug("Stat index %d key %u: u %d, d %d, %s\n", i,
+			 obj_to_key_id(stats->stats_info[i].objagg_obj),
+			 stats->stats_info[i].stats.user_count,
+			 stats->stats_info[i].stats.delta_user_count,
+			 stats->stats_info[i].is_root ? "root" : "noroot");
+}
+
+static void pr_debug_stats(struct objagg *objagg)
+{
+	const struct objagg_stats *stats;
+
+	stats = objagg_stats_get(objagg);
+	if (IS_ERR(stats))
+		return;
+	__pr_debug_stats(stats);
+	objagg_stats_put(stats);
+}
+
+static void pr_debug_hints_stats(struct objagg_hints *objagg_hints)
+{
+	const struct objagg_stats *stats;
+
+	stats = objagg_hints_stats_get(objagg_hints);
+	if (IS_ERR(stats))
+		return;
+	__pr_debug_stats(stats);
+	objagg_stats_put(stats);
+}
+
+static int check_expect_hints_stats(struct objagg_hints *objagg_hints,
+				    const struct expect_stats *expect_stats,
+				    const char **errmsg)
+{
+	const struct objagg_stats *stats;
+	int err;
+
+	stats = objagg_hints_stats_get(objagg_hints);
+	if (IS_ERR(stats))
+		return PTR_ERR(stats);
+	err = __check_expect_stats(stats, expect_stats, errmsg);
+	objagg_stats_put(stats);
+	return err;
+}
+
+static int test_hints_case(const struct hints_case *hints_case)
+{
+	struct objagg_obj *objagg_obj;
+	struct objagg_hints *hints;
+	struct world world2 = {};
+	struct world world = {};
+	struct objagg *objagg2;
+	struct objagg *objagg;
+	const char *errmsg;
+	int i;
+	int err;
+
+	objagg = objagg_create(&delta_ops, NULL, &world);
+	if (IS_ERR(objagg))
+		return PTR_ERR(objagg);
+
+	for (i = 0; i < hints_case->key_ids_count; i++) {
+		objagg_obj = world_obj_get(&world, objagg,
+					   hints_case->key_ids[i]);
+		if (IS_ERR(objagg_obj)) {
+			err = PTR_ERR(objagg_obj);
+			goto err_world_obj_get;
+		}
+	}
+
+	pr_debug_stats(objagg);
+	err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg);
+	if (err) {
+		pr_err("Stats: %s\n", errmsg);
+		goto err_check_expect_stats;
+	}
+
+	hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY);
+	if (IS_ERR(hints)) {
+		err = PTR_ERR(hints);
+		goto err_hints_get;
+	}
+
+	pr_debug_hints_stats(hints);
+	err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints,
+				       &errmsg);
+	if (err) {
+		pr_err("Hints stats: %s\n", errmsg);
+		goto err_check_expect_hints_stats;
+	}
+
+	objagg2 = objagg_create(&delta_ops, hints, &world2);
+	if (IS_ERR(objagg2))
+		return PTR_ERR(objagg2);
+
+	for (i = 0; i < hints_case->key_ids_count; i++) {
+		objagg_obj = world_obj_get(&world2, objagg2,
+					   hints_case->key_ids[i]);
+		if (IS_ERR(objagg_obj)) {
+			err = PTR_ERR(objagg_obj);
+			goto err_world2_obj_get;
+		}
+	}
+
+	pr_debug_stats(objagg2);
+	err = check_expect_stats(objagg2, &hints_case->expect_stats_hints,
+				 &errmsg);
+	if (err) {
+		pr_err("Stats2: %s\n", errmsg);
+		goto err_check_expect_stats2;
+	}
+
+	err = 0;
+
+err_check_expect_stats2:
+err_world2_obj_get:
+	for (i--; i >= 0; i--)
+		world_obj_put(&world2, objagg, hints_case->key_ids[i]);
+	objagg_hints_put(hints);
+	objagg_destroy(objagg2);
+	i = hints_case->key_ids_count;
+err_check_expect_hints_stats:
+err_hints_get:
+err_check_expect_stats:
+err_world_obj_get:
+	for (i--; i >= 0; i--)
+		world_obj_put(&world, objagg, hints_case->key_ids[i]);
+
+	objagg_destroy(objagg);
+	return err;
+}
+static int test_hints(void)
+{
+	return test_hints_case(&hints_case);
+}
+
 static int __init test_objagg_init(void)
 {
 	int err;
@@ -822,7 +1004,10 @@ static int __init test_objagg_init(void)
 	err = test_nodelta();
 	if (err)
 		return err;
-	return test_delta();
+	err = test_delta();
+	if (err)
+		return err;
+	return test_hints();
 }
 
 static void __exit test_objagg_exit(void)
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 6a8ac7626797..3bd2e91bfc29 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -177,16 +177,11 @@ static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array,
 
 static void test_bucket_stats(struct rhashtable *ht, unsigned int entries)
 {
-	unsigned int err, total = 0, chain_len = 0;
+	unsigned int total = 0, chain_len = 0;
 	struct rhashtable_iter hti;
 	struct rhash_head *pos;
 
-	err = rhashtable_walk_init(ht, &hti, GFP_KERNEL);
-	if (err) {
-		pr_warn("Test failed: allocation error");
-		return;
-	}
-
+	rhashtable_walk_enter(ht, &hti);
 	rhashtable_walk_start(&hti);
 
 	while ((pos = rhashtable_walk_next(&hti))) {
@@ -395,7 +390,7 @@ static int __init test_rhltable(unsigned int entries)
 			if (WARN(err, "cannot remove element at slot %d", i))
 				continue;
 		} else {
-			if (WARN(err != -ENOENT, "removed non-existant element %d, error %d not %d",
+			if (WARN(err != -ENOENT, "removed non-existent element %d, error %d not %d",
 			     i, err, -ENOENT))
 				continue;
 		}
@@ -440,7 +435,7 @@ static int __init test_rhltable(unsigned int entries)
 			if (WARN(err, "cannot remove element at slot %d", i))
 				continue;
 		} else {
-			if (WARN(err != -ENOENT, "removed non-existant element, error %d not %d",
+			if (WARN(err != -ENOENT, "removed non-existent element, error %d not %d",
 				 err, -ENOENT))
 			continue;
 		}
@@ -541,38 +536,45 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects,
 				  int cnt, bool slow)
 {
-	struct rhltable rhlt;
+	struct rhltable *rhlt;
 	unsigned int i, ret;
 	const char *key;
 	int err = 0;
 
-	err = rhltable_init(&rhlt, &test_rht_params_dup);
-	if (WARN_ON(err))
+	rhlt = kmalloc(sizeof(*rhlt), GFP_KERNEL);
+	if (WARN_ON(!rhlt))
+		return -EINVAL;
+
+	err = rhltable_init(rhlt, &test_rht_params_dup);
+	if (WARN_ON(err)) {
+		kfree(rhlt);
 		return err;
+	}
 
 	for (i = 0; i < cnt; i++) {
 		rhl_test_objects[i].value.tid = i;
-		key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead);
+		key = rht_obj(&rhlt->ht, &rhl_test_objects[i].list_node.rhead);
 		key += test_rht_params_dup.key_offset;
 
 		if (slow) {
-			err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key,
+			err = PTR_ERR(rhashtable_insert_slow(&rhlt->ht, key,
 							     &rhl_test_objects[i].list_node.rhead));
 			if (err == -EAGAIN)
 				err = 0;
 		} else
-			err = rhltable_insert(&rhlt,
+			err = rhltable_insert(rhlt,
 					      &rhl_test_objects[i].list_node,
 					      test_rht_params_dup);
 		if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast"))
 			goto skip_print;
 	}
 
-	ret = print_ht(&rhlt);
+	ret = print_ht(rhlt);
 	WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast");
 
 skip_print:
-	rhltable_destroy(&rhlt);
+	rhltable_destroy(rhlt);
+	kfree(rhlt);
 
 	return 0;
 }
diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c
new file mode 100644
index 000000000000..13115b6f2b88
--- /dev/null
+++ b/lib/test_stackinit.c
@@ -0,0 +1,378 @@
+// SPDX-Licenses: GPLv2
+/*
+ * Test cases for compiler-based stack variable zeroing via future
+ * compiler flags or CONFIG_GCC_PLUGIN_STRUCTLEAK*.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+/* Exfiltration buffer. */
+#define MAX_VAR_SIZE	128
+static char check_buf[MAX_VAR_SIZE];
+
+/* Character array to trigger stack protector in all functions. */
+#define VAR_BUFFER	 32
+
+/* Volatile mask to convince compiler to copy memory with 0xff. */
+static volatile u8 forced_mask = 0xff;
+
+/* Location and size tracking to validate fill and test are colocated. */
+static void *fill_start, *target_start;
+static size_t fill_size, target_size;
+
+static bool range_contains(char *haystack_start, size_t haystack_size,
+			   char *needle_start, size_t needle_size)
+{
+	if (needle_start >= haystack_start &&
+	    needle_start + needle_size <= haystack_start + haystack_size)
+		return true;
+	return false;
+}
+
+#define DO_NOTHING_TYPE_SCALAR(var_type)	var_type
+#define DO_NOTHING_TYPE_STRING(var_type)	void
+#define DO_NOTHING_TYPE_STRUCT(var_type)	void
+
+#define DO_NOTHING_RETURN_SCALAR(ptr)		*(ptr)
+#define DO_NOTHING_RETURN_STRING(ptr)		/**/
+#define DO_NOTHING_RETURN_STRUCT(ptr)		/**/
+
+#define DO_NOTHING_CALL_SCALAR(var, name)			\
+		(var) = do_nothing_ ## name(&(var))
+#define DO_NOTHING_CALL_STRING(var, name)			\
+		do_nothing_ ## name(var)
+#define DO_NOTHING_CALL_STRUCT(var, name)			\
+		do_nothing_ ## name(&(var))
+
+#define FETCH_ARG_SCALAR(var)		&var
+#define FETCH_ARG_STRING(var)		var
+#define FETCH_ARG_STRUCT(var)		&var
+
+#define FILL_SIZE_STRING		16
+
+#define INIT_CLONE_SCALAR		/**/
+#define INIT_CLONE_STRING		[FILL_SIZE_STRING]
+#define INIT_CLONE_STRUCT		/**/
+
+#define INIT_SCALAR_none		/**/
+#define INIT_SCALAR_zero		= 0
+
+#define INIT_STRING_none		[FILL_SIZE_STRING] /**/
+#define INIT_STRING_zero		[FILL_SIZE_STRING] = { }
+
+#define INIT_STRUCT_none		/**/
+#define INIT_STRUCT_zero		= { }
+#define INIT_STRUCT_static_partial	= { .two = 0, }
+#define INIT_STRUCT_static_all		= { .one = arg->one,		\
+					    .two = arg->two,		\
+					    .three = arg->three,	\
+					    .four = arg->four,		\
+					}
+#define INIT_STRUCT_dynamic_partial	= { .two = arg->two, }
+#define INIT_STRUCT_dynamic_all		= { .one = arg->one,		\
+					    .two = arg->two,		\
+					    .three = arg->three,	\
+					    .four = arg->four,		\
+					}
+#define INIT_STRUCT_runtime_partial	;				\
+					var.two = 0
+#define INIT_STRUCT_runtime_all		;				\
+					var.one = 0;			\
+					var.two = 0;			\
+					var.three = 0;			\
+					memset(&var.four, 0,		\
+					       sizeof(var.four))
+
+/*
+ * @name: unique string name for the test
+ * @var_type: type to be tested for zeroing initialization
+ * @which: is this a SCALAR, STRING, or STRUCT type?
+ * @init_level: what kind of initialization is performed
+ */
+#define DEFINE_TEST_DRIVER(name, var_type, which)		\
+/* Returns 0 on success, 1 on failure. */			\
+static noinline __init int test_ ## name (void)			\
+{								\
+	var_type zero INIT_CLONE_ ## which;			\
+	int ignored;						\
+	u8 sum = 0, i;						\
+								\
+	/* Notice when a new test is larger than expected. */	\
+	BUILD_BUG_ON(sizeof(zero) > MAX_VAR_SIZE);		\
+								\
+	/* Fill clone type with zero for per-field init. */	\
+	memset(&zero, 0x00, sizeof(zero));			\
+	/* Fill stack with 0xFF. */				\
+	ignored = leaf_ ##name((unsigned long)&ignored, 1,	\
+				FETCH_ARG_ ## which(zero));	\
+	/* Clear entire check buffer for later bit tests. */	\
+	memset(check_buf, 0x00, sizeof(check_buf));		\
+	/* Extract stack-defined variable contents. */		\
+	ignored = leaf_ ##name((unsigned long)&ignored, 0,	\
+				FETCH_ARG_ ## which(zero));	\
+								\
+	/* Validate that compiler lined up fill and target. */	\
+	if (!range_contains(fill_start, fill_size,		\
+			    target_start, target_size)) {	\
+		pr_err(#name ": stack fill missed target!?\n");	\
+		pr_err(#name ": fill %zu wide\n", fill_size);	\
+		pr_err(#name ": target offset by %d\n",	\
+			(int)((ssize_t)(uintptr_t)fill_start -	\
+			(ssize_t)(uintptr_t)target_start));	\
+		return 1;					\
+	}							\
+								\
+	/* Look for any set bits in the check region. */	\
+	for (i = 0; i < sizeof(check_buf); i++)			\
+		sum += (check_buf[i] != 0);			\
+								\
+	if (sum == 0)						\
+		pr_info(#name " ok\n");				\
+	else							\
+		pr_warn(#name " FAIL (uninit bytes: %d)\n",	\
+			sum);					\
+								\
+	return (sum != 0);					\
+}
+#define DEFINE_TEST(name, var_type, which, init_level)		\
+/* no-op to force compiler into ignoring "uninitialized" vars */\
+static noinline __init DO_NOTHING_TYPE_ ## which(var_type)	\
+do_nothing_ ## name(var_type *ptr)				\
+{								\
+	/* Will always be true, but compiler doesn't know. */	\
+	if ((unsigned long)ptr > 0x2)				\
+		return DO_NOTHING_RETURN_ ## which(ptr);	\
+	else							\
+		return DO_NOTHING_RETURN_ ## which(ptr + 1);	\
+}								\
+static noinline __init int leaf_ ## name(unsigned long sp,	\
+					 bool fill,		\
+					 var_type *arg)		\
+{								\
+	char buf[VAR_BUFFER];					\
+	var_type var INIT_ ## which ## _ ## init_level;		\
+								\
+	target_start = &var;					\
+	target_size = sizeof(var);				\
+	/*							\
+	 * Keep this buffer around to make sure we've got a	\
+	 * stack frame of SOME kind...				\
+	 */							\
+	memset(buf, (char)(sp && 0xff), sizeof(buf));		\
+	/* Fill variable with 0xFF. */				\
+	if (fill) {						\
+		fill_start = &var;				\
+		fill_size = sizeof(var);			\
+		memset(fill_start,				\
+		       (char)((sp && 0xff) | forced_mask),	\
+		       fill_size);				\
+	}							\
+								\
+	/* Silence "never initialized" warnings. */		\
+	DO_NOTHING_CALL_ ## which(var, name);			\
+								\
+	/* Exfiltrate "var". */					\
+	memcpy(check_buf, target_start, target_size);		\
+								\
+	return (int)buf[0] | (int)buf[sizeof(buf) - 1];		\
+}								\
+DEFINE_TEST_DRIVER(name, var_type, which)
+
+/* Structure with no padding. */
+struct test_packed {
+	unsigned long one;
+	unsigned long two;
+	unsigned long three;
+	unsigned long four;
+};
+
+/* Simple structure with padding likely to be covered by compiler. */
+struct test_small_hole {
+	size_t one;
+	char two;
+	/* 3 byte padding hole here. */
+	int three;
+	unsigned long four;
+};
+
+/* Try to trigger unhandled padding in a structure. */
+struct test_aligned {
+	u32 internal1;
+	u64 internal2;
+} __aligned(64);
+
+struct test_big_hole {
+	u8 one;
+	u8 two;
+	u8 three;
+	/* 61 byte padding hole here. */
+	struct test_aligned four;
+} __aligned(64);
+
+struct test_trailing_hole {
+	char *one;
+	char *two;
+	char *three;
+	char four;
+	/* "sizeof(unsigned long) - 1" byte padding hole here. */
+};
+
+/* Test if STRUCTLEAK is clearing structs with __user fields. */
+struct test_user {
+	u8 one;
+	unsigned long two;
+	char __user *three;
+	unsigned long four;
+};
+
+#define DEFINE_SCALAR_TEST(name, init)				\
+		DEFINE_TEST(name ## _ ## init, name, SCALAR, init)
+
+#define DEFINE_SCALAR_TESTS(init)				\
+		DEFINE_SCALAR_TEST(u8, init);			\
+		DEFINE_SCALAR_TEST(u16, init);			\
+		DEFINE_SCALAR_TEST(u32, init);			\
+		DEFINE_SCALAR_TEST(u64, init);			\
+		DEFINE_TEST(char_array_ ## init, unsigned char, STRING, init)
+
+#define DEFINE_STRUCT_TEST(name, init)				\
+		DEFINE_TEST(name ## _ ## init,			\
+			    struct test_ ## name, STRUCT, init)
+
+#define DEFINE_STRUCT_TESTS(init)				\
+		DEFINE_STRUCT_TEST(small_hole, init);		\
+		DEFINE_STRUCT_TEST(big_hole, init);		\
+		DEFINE_STRUCT_TEST(trailing_hole, init);	\
+		DEFINE_STRUCT_TEST(packed, init)
+
+/* These should be fully initialized all the time! */
+DEFINE_SCALAR_TESTS(zero);
+DEFINE_STRUCT_TESTS(zero);
+/* Static initialization: padding may be left uninitialized. */
+DEFINE_STRUCT_TESTS(static_partial);
+DEFINE_STRUCT_TESTS(static_all);
+/* Dynamic initialization: padding may be left uninitialized. */
+DEFINE_STRUCT_TESTS(dynamic_partial);
+DEFINE_STRUCT_TESTS(dynamic_all);
+/* Runtime initialization: padding may be left uninitialized. */
+DEFINE_STRUCT_TESTS(runtime_partial);
+DEFINE_STRUCT_TESTS(runtime_all);
+/* No initialization without compiler instrumentation. */
+DEFINE_SCALAR_TESTS(none);
+DEFINE_STRUCT_TESTS(none);
+DEFINE_TEST(user, struct test_user, STRUCT, none);
+
+/*
+ * Check two uses through a variable declaration outside either path,
+ * which was noticed as a special case in porting earlier stack init
+ * compiler logic.
+ */
+static int noinline __leaf_switch_none(int path, bool fill)
+{
+	switch (path) {
+		uint64_t var;
+
+	case 1:
+		target_start = &var;
+		target_size = sizeof(var);
+		if (fill) {
+			fill_start = &var;
+			fill_size = sizeof(var);
+
+			memset(fill_start, forced_mask | 0x55, fill_size);
+		}
+		memcpy(check_buf, target_start, target_size);
+		break;
+	case 2:
+		target_start = &var;
+		target_size = sizeof(var);
+		if (fill) {
+			fill_start = &var;
+			fill_size = sizeof(var);
+
+			memset(fill_start, forced_mask | 0xaa, fill_size);
+		}
+		memcpy(check_buf, target_start, target_size);
+		break;
+	default:
+		var = 5;
+		return var & forced_mask;
+	}
+	return 0;
+}
+
+static noinline __init int leaf_switch_1_none(unsigned long sp, bool fill,
+					      uint64_t *arg)
+{
+	return __leaf_switch_none(1, fill);
+}
+
+static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill,
+					      uint64_t *arg)
+{
+	return __leaf_switch_none(2, fill);
+}
+
+DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR);
+DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR);
+
+static int __init test_stackinit_init(void)
+{
+	unsigned int failures = 0;
+
+#define test_scalars(init)	do {				\
+		failures += test_u8_ ## init ();		\
+		failures += test_u16_ ## init ();		\
+		failures += test_u32_ ## init ();		\
+		failures += test_u64_ ## init ();		\
+		failures += test_char_array_ ## init ();	\
+	} while (0)
+
+#define test_structs(init)	do {				\
+		failures += test_small_hole_ ## init ();	\
+		failures += test_big_hole_ ## init ();		\
+		failures += test_trailing_hole_ ## init ();	\
+		failures += test_packed_ ## init ();		\
+	} while (0)
+
+	/* These are explicitly initialized and should always pass. */
+	test_scalars(zero);
+	test_structs(zero);
+	/* Padding here appears to be accidentally always initialized? */
+	test_structs(dynamic_partial);
+	/* Padding initialization depends on compiler behaviors. */
+	test_structs(static_partial);
+	test_structs(static_all);
+	test_structs(dynamic_all);
+	test_structs(runtime_partial);
+	test_structs(runtime_all);
+
+	/* STRUCTLEAK_BYREF_ALL should cover everything from here down. */
+	test_scalars(none);
+	failures += test_switch_1_none();
+	failures += test_switch_2_none();
+
+	/* STRUCTLEAK_BYREF should cover from here down. */
+	test_structs(none);
+
+	/* STRUCTLEAK will only cover this. */
+	failures += test_user();
+
+	if (failures == 0)
+		pr_info("all tests passed!\n");
+	else
+		pr_err("failures: %u\n", failures);
+
+	return failures ? -EINVAL : 0;
+}
+module_init(test_stackinit_init);
+
+static void __exit test_stackinit_exit(void)
+{ }
+module_exit(test_stackinit_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 280f4979d00e..9ea10adf7a66 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -42,14 +42,6 @@ static void test_ubsan_divrem_overflow(void)
 	val /= val2;
 }
 
-static void test_ubsan_vla_bound_not_positive(void)
-{
-	volatile int size = -1;
-	char buf[size];
-
-	(void)buf;
-}
-
 static void test_ubsan_shift_out_of_bounds(void)
 {
 	volatile int val = -1;
@@ -61,7 +53,7 @@ static void test_ubsan_shift_out_of_bounds(void)
 static void test_ubsan_out_of_bounds(void)
 {
 	volatile int i = 4, j = 5;
-	volatile int arr[i];
+	volatile int arr[4];
 
 	arr[j] = i;
 }
@@ -113,7 +105,6 @@ static const test_ubsan_fp test_ubsan_array[] = {
 	test_ubsan_mul_overflow,
 	test_ubsan_negate_overflow,
 	test_ubsan_divrem_overflow,
-	test_ubsan_vla_bound_not_positive,
 	test_ubsan_shift_out_of_bounds,
 	test_ubsan_out_of_bounds,
 	test_ubsan_load_invalid_value,
diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c
new file mode 100644
index 000000000000..83cdcaa82bf6
--- /dev/null
+++ b/lib/test_vmalloc.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test module for stress and analyze performance of vmalloc allocator.
+ * (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/random.h>
+#include <linux/kthread.h>
+#include <linux/moduleparam.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/rwsem.h>
+#include <linux/mm.h>
+
+#define __param(type, name, init, msg)		\
+	static type name = init;				\
+	module_param(name, type, 0444);			\
+	MODULE_PARM_DESC(name, msg)				\
+
+__param(bool, single_cpu_test, false,
+	"Use single first online CPU to run tests");
+
+__param(bool, sequential_test_order, false,
+	"Use sequential stress tests order");
+
+__param(int, test_repeat_count, 1,
+	"Set test repeat counter");
+
+__param(int, test_loop_count, 1000000,
+	"Set test loop counter");
+
+__param(int, run_test_mask, INT_MAX,
+	"Set tests specified in the mask.\n\n"
+		"\t\tid: 1,   name: fix_size_alloc_test\n"
+		"\t\tid: 2,   name: full_fit_alloc_test\n"
+		"\t\tid: 4,   name: long_busy_list_alloc_test\n"
+		"\t\tid: 8,   name: random_size_alloc_test\n"
+		"\t\tid: 16,  name: fix_align_alloc_test\n"
+		"\t\tid: 32,  name: random_size_align_alloc_test\n"
+		"\t\tid: 64,  name: align_shift_alloc_test\n"
+		"\t\tid: 128, name: pcpu_alloc_test\n"
+		/* Add a new test case description here. */
+);
+
+/*
+ * Depends on single_cpu_test parameter. If it is true, then
+ * use first online CPU to trigger a test on, otherwise go with
+ * all online CPUs.
+ */
+static cpumask_t cpus_run_test_mask = CPU_MASK_NONE;
+
+/*
+ * Read write semaphore for synchronization of setup
+ * phase that is done in main thread and workers.
+ */
+static DECLARE_RWSEM(prepare_for_test_rwsem);
+
+/*
+ * Completion tracking for worker threads.
+ */
+static DECLARE_COMPLETION(test_all_done_comp);
+static atomic_t test_n_undone = ATOMIC_INIT(0);
+
+static inline void
+test_report_one_done(void)
+{
+	if (atomic_dec_and_test(&test_n_undone))
+		complete(&test_all_done_comp);
+}
+
+static int random_size_align_alloc_test(void)
+{
+	unsigned long size, align, rnd;
+	void *ptr;
+	int i;
+
+	for (i = 0; i < test_loop_count; i++) {
+		get_random_bytes(&rnd, sizeof(rnd));
+
+		/*
+		 * Maximum 1024 pages, if PAGE_SIZE is 4096.
+		 */
+		align = 1 << (rnd % 23);
+
+		/*
+		 * Maximum 10 pages.
+		 */
+		size = ((rnd % 10) + 1) * PAGE_SIZE;
+
+		ptr = __vmalloc_node_range(size, align,
+		   VMALLOC_START, VMALLOC_END,
+		   GFP_KERNEL | __GFP_ZERO,
+		   PAGE_KERNEL,
+		   0, 0, __builtin_return_address(0));
+
+		if (!ptr)
+			return -1;
+
+		vfree(ptr);
+	}
+
+	return 0;
+}
+
+/*
+ * This test case is supposed to be failed.
+ */
+static int align_shift_alloc_test(void)
+{
+	unsigned long align;
+	void *ptr;
+	int i;
+
+	for (i = 0; i < BITS_PER_LONG; i++) {
+		align = ((unsigned long) 1) << i;
+
+		ptr = __vmalloc_node_range(PAGE_SIZE, align,
+			VMALLOC_START, VMALLOC_END,
+			GFP_KERNEL | __GFP_ZERO,
+			PAGE_KERNEL,
+			0, 0, __builtin_return_address(0));
+
+		if (!ptr)
+			return -1;
+
+		vfree(ptr);
+	}
+
+	return 0;
+}
+
+static int fix_align_alloc_test(void)
+{
+	void *ptr;
+	int i;
+
+	for (i = 0; i < test_loop_count; i++) {
+		ptr = __vmalloc_node_range(5 * PAGE_SIZE,
+			THREAD_ALIGN << 1,
+			VMALLOC_START, VMALLOC_END,
+			GFP_KERNEL | __GFP_ZERO,
+			PAGE_KERNEL,
+			0, 0, __builtin_return_address(0));
+
+		if (!ptr)
+			return -1;
+
+		vfree(ptr);
+	}
+
+	return 0;
+}
+
+static int random_size_alloc_test(void)
+{
+	unsigned int n;
+	void *p;
+	int i;
+
+	for (i = 0; i < test_loop_count; i++) {
+		get_random_bytes(&n, sizeof(i));
+		n = (n % 100) + 1;
+
+		p = vmalloc(n * PAGE_SIZE);
+
+		if (!p)
+			return -1;
+
+		*((__u8 *)p) = 1;
+		vfree(p);
+	}
+
+	return 0;
+}
+
+static int long_busy_list_alloc_test(void)
+{
+	void *ptr_1, *ptr_2;
+	void **ptr;
+	int rv = -1;
+	int i;
+
+	ptr = vmalloc(sizeof(void *) * 15000);
+	if (!ptr)
+		return rv;
+
+	for (i = 0; i < 15000; i++)
+		ptr[i] = vmalloc(1 * PAGE_SIZE);
+
+	for (i = 0; i < test_loop_count; i++) {
+		ptr_1 = vmalloc(100 * PAGE_SIZE);
+		if (!ptr_1)
+			goto leave;
+
+		ptr_2 = vmalloc(1 * PAGE_SIZE);
+		if (!ptr_2) {
+			vfree(ptr_1);
+			goto leave;
+		}
+
+		*((__u8 *)ptr_1) = 0;
+		*((__u8 *)ptr_2) = 1;
+
+		vfree(ptr_1);
+		vfree(ptr_2);
+	}
+
+	/*  Success */
+	rv = 0;
+
+leave:
+	for (i = 0; i < 15000; i++)
+		vfree(ptr[i]);
+
+	vfree(ptr);
+	return rv;
+}
+
+static int full_fit_alloc_test(void)
+{
+	void **ptr, **junk_ptr, *tmp;
+	int junk_length;
+	int rv = -1;
+	int i;
+
+	junk_length = fls(num_online_cpus());
+	junk_length *= (32 * 1024 * 1024 / PAGE_SIZE);
+
+	ptr = vmalloc(sizeof(void *) * junk_length);
+	if (!ptr)
+		return rv;
+
+	junk_ptr = vmalloc(sizeof(void *) * junk_length);
+	if (!junk_ptr) {
+		vfree(ptr);
+		return rv;
+	}
+
+	for (i = 0; i < junk_length; i++) {
+		ptr[i] = vmalloc(1 * PAGE_SIZE);
+		junk_ptr[i] = vmalloc(1 * PAGE_SIZE);
+	}
+
+	for (i = 0; i < junk_length; i++)
+		vfree(junk_ptr[i]);
+
+	for (i = 0; i < test_loop_count; i++) {
+		tmp = vmalloc(1 * PAGE_SIZE);
+
+		if (!tmp)
+			goto error;
+
+		*((__u8 *)tmp) = 1;
+		vfree(tmp);
+	}
+
+	/* Success */
+	rv = 0;
+
+error:
+	for (i = 0; i < junk_length; i++)
+		vfree(ptr[i]);
+
+	vfree(ptr);
+	vfree(junk_ptr);
+
+	return rv;
+}
+
+static int fix_size_alloc_test(void)
+{
+	void *ptr;
+	int i;
+
+	for (i = 0; i < test_loop_count; i++) {
+		ptr = vmalloc(3 * PAGE_SIZE);
+
+		if (!ptr)
+			return -1;
+
+		*((__u8 *)ptr) = 0;
+
+		vfree(ptr);
+	}
+
+	return 0;
+}
+
+static int
+pcpu_alloc_test(void)
+{
+	int rv = 0;
+#ifndef CONFIG_NEED_PER_CPU_KM
+	void __percpu **pcpu;
+	size_t size, align;
+	int i;
+
+	pcpu = vmalloc(sizeof(void __percpu *) * 35000);
+	if (!pcpu)
+		return -1;
+
+	for (i = 0; i < 35000; i++) {
+		unsigned int r;
+
+		get_random_bytes(&r, sizeof(i));
+		size = (r % (PAGE_SIZE / 4)) + 1;
+
+		/*
+		 * Maximum PAGE_SIZE
+		 */
+		get_random_bytes(&r, sizeof(i));
+		align = 1 << ((i % 11) + 1);
+
+		pcpu[i] = __alloc_percpu(size, align);
+		if (!pcpu[i])
+			rv = -1;
+	}
+
+	for (i = 0; i < 35000; i++)
+		free_percpu(pcpu[i]);
+
+	vfree(pcpu);
+#endif
+	return rv;
+}
+
+struct test_case_desc {
+	const char *test_name;
+	int (*test_func)(void);
+};
+
+static struct test_case_desc test_case_array[] = {
+	{ "fix_size_alloc_test", fix_size_alloc_test },
+	{ "full_fit_alloc_test", full_fit_alloc_test },
+	{ "long_busy_list_alloc_test", long_busy_list_alloc_test },
+	{ "random_size_alloc_test", random_size_alloc_test },
+	{ "fix_align_alloc_test", fix_align_alloc_test },
+	{ "random_size_align_alloc_test", random_size_align_alloc_test },
+	{ "align_shift_alloc_test", align_shift_alloc_test },
+	{ "pcpu_alloc_test", pcpu_alloc_test },
+	/* Add a new test case here. */
+};
+
+struct test_case_data {
+	int test_failed;
+	int test_passed;
+	u64 time;
+};
+
+/* Split it to get rid of: WARNING: line over 80 characters */
+static struct test_case_data
+	per_cpu_test_data[NR_CPUS][ARRAY_SIZE(test_case_array)];
+
+static struct test_driver {
+	struct task_struct *task;
+	unsigned long start;
+	unsigned long stop;
+	int cpu;
+} per_cpu_test_driver[NR_CPUS];
+
+static void shuffle_array(int *arr, int n)
+{
+	unsigned int rnd;
+	int i, j, x;
+
+	for (i = n - 1; i > 0; i--)  {
+		get_random_bytes(&rnd, sizeof(rnd));
+
+		/* Cut the range. */
+		j = rnd % i;
+
+		/* Swap indexes. */
+		x = arr[i];
+		arr[i] = arr[j];
+		arr[j] = x;
+	}
+}
+
+static int test_func(void *private)
+{
+	struct test_driver *t = private;
+	cpumask_t newmask = CPU_MASK_NONE;
+	int random_array[ARRAY_SIZE(test_case_array)];
+	int index, i, j, ret;
+	ktime_t kt;
+	u64 delta;
+
+	cpumask_set_cpu(t->cpu, &newmask);
+	set_cpus_allowed_ptr(current, &newmask);
+
+	for (i = 0; i < ARRAY_SIZE(test_case_array); i++)
+		random_array[i] = i;
+
+	if (!sequential_test_order)
+		shuffle_array(random_array, ARRAY_SIZE(test_case_array));
+
+	/*
+	 * Block until initialization is done.
+	 */
+	down_read(&prepare_for_test_rwsem);
+
+	t->start = get_cycles();
+	for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
+		index = random_array[i];
+
+		/*
+		 * Skip tests if run_test_mask has been specified.
+		 */
+		if (!((run_test_mask & (1 << index)) >> index))
+			continue;
+
+		kt = ktime_get();
+		for (j = 0; j < test_repeat_count; j++) {
+			ret = test_case_array[index].test_func();
+			if (!ret)
+				per_cpu_test_data[t->cpu][index].test_passed++;
+			else
+				per_cpu_test_data[t->cpu][index].test_failed++;
+		}
+
+		/*
+		 * Take an average time that test took.
+		 */
+		delta = (u64) ktime_us_delta(ktime_get(), kt);
+		do_div(delta, (u32) test_repeat_count);
+
+		per_cpu_test_data[t->cpu][index].time = delta;
+	}
+	t->stop = get_cycles();
+
+	up_read(&prepare_for_test_rwsem);
+	test_report_one_done();
+
+	/*
+	 * Wait for the kthread_stop() call.
+	 */
+	while (!kthread_should_stop())
+		msleep(10);
+
+	return 0;
+}
+
+static void
+init_test_configurtion(void)
+{
+	/*
+	 * Reset all data of all CPUs.
+	 */
+	memset(per_cpu_test_data, 0, sizeof(per_cpu_test_data));
+
+	if (single_cpu_test)
+		cpumask_set_cpu(cpumask_first(cpu_online_mask),
+			&cpus_run_test_mask);
+	else
+		cpumask_and(&cpus_run_test_mask, cpu_online_mask,
+			cpu_online_mask);
+
+	if (test_repeat_count <= 0)
+		test_repeat_count = 1;
+
+	if (test_loop_count <= 0)
+		test_loop_count = 1;
+}
+
+static void do_concurrent_test(void)
+{
+	int cpu, ret;
+
+	/*
+	 * Set some basic configurations plus sanity check.
+	 */
+	init_test_configurtion();
+
+	/*
+	 * Put on hold all workers.
+	 */
+	down_write(&prepare_for_test_rwsem);
+
+	for_each_cpu(cpu, &cpus_run_test_mask) {
+		struct test_driver *t = &per_cpu_test_driver[cpu];
+
+		t->cpu = cpu;
+		t->task = kthread_run(test_func, t, "vmalloc_test/%d", cpu);
+
+		if (!IS_ERR(t->task))
+			/* Success. */
+			atomic_inc(&test_n_undone);
+		else
+			pr_err("Failed to start kthread for %d CPU\n", cpu);
+	}
+
+	/*
+	 * Now let the workers do their job.
+	 */
+	up_write(&prepare_for_test_rwsem);
+
+	/*
+	 * Sleep quiet until all workers are done with 1 second
+	 * interval. Since the test can take a lot of time we
+	 * can run into a stack trace of the hung task. That is
+	 * why we go with completion_timeout and HZ value.
+	 */
+	do {
+		ret = wait_for_completion_timeout(&test_all_done_comp, HZ);
+	} while (!ret);
+
+	for_each_cpu(cpu, &cpus_run_test_mask) {
+		struct test_driver *t = &per_cpu_test_driver[cpu];
+		int i;
+
+		if (!IS_ERR(t->task))
+			kthread_stop(t->task);
+
+		for (i = 0; i < ARRAY_SIZE(test_case_array); i++) {
+			if (!((run_test_mask & (1 << i)) >> i))
+				continue;
+
+			pr_info(
+				"Summary: %s passed: %d failed: %d repeat: %d loops: %d avg: %llu usec\n",
+				test_case_array[i].test_name,
+				per_cpu_test_data[cpu][i].test_passed,
+				per_cpu_test_data[cpu][i].test_failed,
+				test_repeat_count, test_loop_count,
+				per_cpu_test_data[cpu][i].time);
+		}
+
+		pr_info("All test took CPU%d=%lu cycles\n",
+			cpu, t->stop - t->start);
+	}
+}
+
+static int vmalloc_test_init(void)
+{
+	do_concurrent_test();
+	return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void vmalloc_test_exit(void)
+{
+}
+
+module_init(vmalloc_test_init)
+module_exit(vmalloc_test_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Uladzislau Rezki");
+MODULE_DESCRIPTION("vmalloc test module");
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 4676c0a1eeca..5d4bad8bd96a 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -40,9 +40,9 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 
 static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
-	u32 id = 0;
+	u32 id;
 
-	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(index),
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(index), xa_limit_32b,
 				gfp) != 0);
 	XA_BUG_ON(xa, id != index);
 }
@@ -107,8 +107,11 @@ static noinline void check_xas_retry(struct xarray *xa)
 	XA_BUG_ON(xa, xas.xa_node != XAS_RESTART);
 	XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
 	XA_BUG_ON(xa, xas.xa_node != NULL);
+	rcu_read_unlock();
 
 	XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+
+	rcu_read_lock();
 	XA_BUG_ON(xa, !xa_is_internal(xas_reload(&xas)));
 	xas.xa_node = XAS_RESTART;
 	XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
@@ -199,7 +202,7 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 		XA_BUG_ON(xa, xa_store_index(xa, index + 1, GFP_KERNEL));
 		xa_set_mark(xa, index + 1, XA_MARK_0);
 		XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL));
-		xa_set_mark(xa, index + 2, XA_MARK_1);
+		xa_set_mark(xa, index + 2, XA_MARK_2);
 		XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL));
 		xa_store_order(xa, index, order, xa_mk_index(index),
 				GFP_KERNEL);
@@ -209,8 +212,8 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 			void *entry;
 
 			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
-			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1));
-			XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2));
+			XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_1));
+			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_2));
 
 			/* We should see two elements in the array */
 			rcu_read_lock();
@@ -343,7 +346,7 @@ static noinline void check_cmpxchg(struct xarray *xa)
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL);
-	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST);
+	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EBUSY);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
@@ -357,45 +360,66 @@ static noinline void check_cmpxchg(struct xarray *xa)
 static noinline void check_reserve(struct xarray *xa)
 {
 	void *entry;
-	unsigned long index = 0;
+	unsigned long index;
+	int count;
 
 	/* An array with a reserved entry is not empty */
 	XA_BUG_ON(xa, !xa_empty(xa));
-	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_empty(xa));
 	XA_BUG_ON(xa, xa_load(xa, 12345678));
 	xa_release(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* Releasing a used entry does nothing */
-	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
 	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
-	/* cmpxchg sees a reserved entry as NULL */
-	xa_reserve(xa, 12345678, GFP_KERNEL);
-	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678),
-				GFP_NOWAIT) != NULL);
+	/* cmpxchg sees a reserved entry as ZERO */
+	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, XA_ZERO_ENTRY,
+				xa_mk_value(12345678), GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
 	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
-	/* And so does xa_insert */
-	xa_reserve(xa, 12345678, GFP_KERNEL);
-	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != 0);
-	xa_erase_index(xa, 12345678);
+	/* xa_insert treats it as busy */
+	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) !=
+			-EBUSY);
+	XA_BUG_ON(xa, xa_empty(xa));
+	XA_BUG_ON(xa, xa_erase(xa, 12345678) != NULL);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* Can iterate through a reserved entry */
 	xa_store_index(xa, 5, GFP_KERNEL);
-	xa_reserve(xa, 6, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_reserve(xa, 6, GFP_KERNEL) != 0);
 	xa_store_index(xa, 7, GFP_KERNEL);
 
-	xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+	count = 0;
+	xa_for_each(xa, index, entry) {
 		XA_BUG_ON(xa, index != 5 && index != 7);
+		count++;
 	}
+	XA_BUG_ON(xa, count != 2);
+
+	/* If we free a reserved entry, we should be able to allocate it */
+	if (xa->xa_flags & XA_FLAGS_ALLOC) {
+		u32 id;
+
+		XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_value(8),
+					XA_LIMIT(5, 10), GFP_KERNEL) != 0);
+		XA_BUG_ON(xa, id != 8);
+
+		xa_release(xa, 6);
+		XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_value(6),
+					XA_LIMIT(5, 10), GFP_KERNEL) != 0);
+		XA_BUG_ON(xa, id != 6);
+	}
+
 	xa_destroy(xa);
 }
 
@@ -584,64 +608,194 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
-static DEFINE_XARRAY_ALLOC(xa0);
-
-static noinline void check_xa_alloc(void)
+static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base)
 {
 	int i;
 	u32 id;
 
-	/* An empty array should assign 0 to the first alloc */
-	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+	/* An empty array should assign %base to the first alloc */
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* Erasing it should make the array empty again */
-	xa_erase_index(&xa0, 0);
-	XA_BUG_ON(&xa0, !xa_empty(&xa0));
+	xa_erase_index(xa, base);
+	XA_BUG_ON(xa, !xa_empty(xa));
 
-	/* And it should assign 0 again */
-	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+	/* And it should assign %base again */
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
-	/* The next assigned ID should be 1 */
-	xa_alloc_index(&xa0, 1, GFP_KERNEL);
-	xa_erase_index(&xa0, 1);
+	/* Allocating and then erasing a lot should not lose base */
+	for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++)
+		xa_alloc_index(xa, i, GFP_KERNEL);
+	for (i = base; i < 2 * XA_CHUNK_SIZE; i++)
+		xa_erase_index(xa, i);
+	xa_alloc_index(xa, base, GFP_KERNEL);
+
+	/* Destroying the array should do the same as erasing */
+	xa_destroy(xa);
+
+	/* And it should assign %base again */
+	xa_alloc_index(xa, base, GFP_KERNEL);
+
+	/* The next assigned ID should be base+1 */
+	xa_alloc_index(xa, base + 1, GFP_KERNEL);
+	xa_erase_index(xa, base + 1);
 
 	/* Storing a value should mark it used */
-	xa_store_index(&xa0, 1, GFP_KERNEL);
-	xa_alloc_index(&xa0, 2, GFP_KERNEL);
+	xa_store_index(xa, base + 1, GFP_KERNEL);
+	xa_alloc_index(xa, base + 2, GFP_KERNEL);
 
-	/* If we then erase 0, it should be free */
-	xa_erase_index(&xa0, 0);
-	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+	/* If we then erase base, it should be free */
+	xa_erase_index(xa, base);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
-	xa_erase_index(&xa0, 1);
-	xa_erase_index(&xa0, 2);
+	xa_erase_index(xa, base + 1);
+	xa_erase_index(xa, base + 2);
 
 	for (i = 1; i < 5000; i++) {
-		xa_alloc_index(&xa0, i, GFP_KERNEL);
+		xa_alloc_index(xa, base + i, GFP_KERNEL);
 	}
 
-	xa_destroy(&xa0);
+	xa_destroy(xa);
 
-	id = 0xfffffffeU;
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
+	/* Check that we fail properly at the limit of allocation */
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX - 1),
+				XA_LIMIT(UINT_MAX - 1, UINT_MAX),
 				GFP_KERNEL) != 0);
-	XA_BUG_ON(&xa0, id != 0xfffffffeU);
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
+	XA_BUG_ON(xa, id != 0xfffffffeU);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX),
+				XA_LIMIT(UINT_MAX - 1, UINT_MAX),
 				GFP_KERNEL) != 0);
-	XA_BUG_ON(&xa0, id != 0xffffffffU);
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
-				GFP_KERNEL) != -ENOSPC);
-	XA_BUG_ON(&xa0, id != 0xffffffffU);
-	xa_destroy(&xa0);
-
-	id = 10;
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id),
-				GFP_KERNEL) != -ENOSPC);
-	XA_BUG_ON(&xa0, xa_store_index(&xa0, 3, GFP_KERNEL) != 0);
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id),
-				GFP_KERNEL) != -ENOSPC);
-	xa_erase_index(&xa0, 3);
-	XA_BUG_ON(&xa0, !xa_empty(&xa0));
+	XA_BUG_ON(xa, id != 0xffffffffU);
+	id = 3;
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(0),
+				XA_LIMIT(UINT_MAX - 1, UINT_MAX),
+				GFP_KERNEL) != -EBUSY);
+	XA_BUG_ON(xa, id != 3);
+	xa_destroy(xa);
+
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5),
+				GFP_KERNEL) != -EBUSY);
+	XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5),
+				GFP_KERNEL) != -EBUSY);
+	xa_erase_index(xa, 3);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base)
+{
+	unsigned int i, id;
+	unsigned long index;
+	void *entry;
+
+	/* Allocate and free a NULL and check xa_empty() behaves */
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != base);
+	XA_BUG_ON(xa, xa_empty(xa));
+	XA_BUG_ON(xa, xa_erase(xa, id) != NULL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Ditto, but check destroy instead of erase */
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != base);
+	XA_BUG_ON(xa, xa_empty(xa));
+	xa_destroy(xa);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (i = base; i < base + 10; i++) {
+		XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b,
+					GFP_KERNEL) != 0);
+		XA_BUG_ON(xa, id != i);
+	}
+
+	XA_BUG_ON(xa, xa_store(xa, 3, xa_mk_index(3), GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_store(xa, 4, xa_mk_index(4), GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_store(xa, 4, NULL, GFP_KERNEL) != xa_mk_index(4));
+	XA_BUG_ON(xa, xa_erase(xa, 5) != NULL);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != 5);
+
+	xa_for_each(xa, index, entry) {
+		xa_erase_index(xa, index);
+	}
+
+	for (i = base; i < base + 9; i++) {
+		XA_BUG_ON(xa, xa_erase(xa, i) != NULL);
+		XA_BUG_ON(xa, xa_empty(xa));
+	}
+	XA_BUG_ON(xa, xa_erase(xa, 8) != NULL);
+	XA_BUG_ON(xa, xa_empty(xa));
+	XA_BUG_ON(xa, xa_erase(xa, base + 9) != NULL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	xa_destroy(xa);
+}
+
+static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base)
+{
+	struct xa_limit limit = XA_LIMIT(1, 0x3fff);
+	u32 next = 0;
+	unsigned int i, id;
+	unsigned long index;
+	void *entry;
+
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(1), limit,
+				&next, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != 1);
+
+	next = 0x3ffd;
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit,
+				&next, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != 0x3ffd);
+	xa_erase_index(xa, 0x3ffd);
+	xa_erase_index(xa, 1);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (i = 0x3ffe; i < 0x4003; i++) {
+		if (i < 0x4000)
+			entry = xa_mk_index(i);
+		else
+			entry = xa_mk_index(i - 0x3fff);
+		XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, entry, limit,
+					&next, GFP_KERNEL) != (id == 1));
+		XA_BUG_ON(xa, xa_mk_index(id) != entry);
+	}
+
+	/* Check wrap-around is handled correctly */
+	if (base != 0)
+		xa_erase_index(xa, base);
+	xa_erase_index(xa, base + 1);
+	next = UINT_MAX;
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX),
+				xa_limit_32b, &next, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != UINT_MAX);
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base),
+				xa_limit_32b, &next, GFP_KERNEL) != 1);
+	XA_BUG_ON(xa, id != base);
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base + 1),
+				xa_limit_32b, &next, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != base + 1);
+
+	xa_for_each(xa, index, entry)
+		xa_erase_index(xa, index);
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static DEFINE_XARRAY_ALLOC(xa0);
+static DEFINE_XARRAY_ALLOC1(xa1);
+
+static noinline void check_xa_alloc(void)
+{
+	check_xa_alloc_1(&xa0, 0);
+	check_xa_alloc_1(&xa1, 1);
+	check_xa_alloc_2(&xa0, 0);
+	check_xa_alloc_2(&xa1, 1);
+	check_xa_alloc_3(&xa0, 0);
+	check_xa_alloc_3(&xa1, 1);
 }
 
 static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
@@ -812,17 +966,16 @@ static noinline void check_find_1(struct xarray *xa)
 static noinline void check_find_2(struct xarray *xa)
 {
 	void *entry;
-	unsigned long i, j, index = 0;
+	unsigned long i, j, index;
 
-	xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+	xa_for_each(xa, index, entry) {
 		XA_BUG_ON(xa, true);
 	}
 
 	for (i = 0; i < 1024; i++) {
 		xa_store_index(xa, index, GFP_KERNEL);
 		j = 0;
-		index = 0;
-		xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+		xa_for_each(xa, index, entry) {
 			XA_BUG_ON(xa, xa_mk_index(index) != entry);
 			XA_BUG_ON(xa, index != j++);
 		}
@@ -839,6 +992,7 @@ static noinline void check_find_3(struct xarray *xa)
 
 	for (i = 0; i < 100; i++) {
 		for (j = 0; j < 100; j++) {
+			rcu_read_lock();
 			for (k = 0; k < 100; k++) {
 				xas_set(&xas, j);
 				xas_for_each_marked(&xas, entry, k, XA_MARK_0)
@@ -847,6 +1001,7 @@ static noinline void check_find_3(struct xarray *xa)
 					XA_BUG_ON(xa,
 						xas.xa_node != XAS_RESTART);
 			}
+			rcu_read_unlock();
 		}
 		xa_store_index(xa, i, GFP_KERNEL);
 		xa_set_mark(xa, i, XA_MARK_0);
@@ -1183,6 +1338,58 @@ static noinline void check_store_range(struct xarray *xa)
 	}
 }
 
+static void check_align_1(struct xarray *xa, char *name)
+{
+	int i;
+	unsigned int id;
+	unsigned long index;
+	void *entry;
+
+	for (i = 0; i < 8; i++) {
+		XA_BUG_ON(xa, xa_alloc(xa, &id, name + i, xa_limit_32b,
+					GFP_KERNEL) != 0);
+		XA_BUG_ON(xa, id != i);
+	}
+	xa_for_each(xa, index, entry)
+		XA_BUG_ON(xa, xa_is_err(entry));
+	xa_destroy(xa);
+}
+
+/*
+ * We should always be able to store without allocating memory after
+ * reserving a slot.
+ */
+static void check_align_2(struct xarray *xa, char *name)
+{
+	int i;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (i = 0; i < 8; i++) {
+		XA_BUG_ON(xa, xa_store(xa, 0, name + i, GFP_KERNEL) != NULL);
+		xa_erase(xa, 0);
+	}
+
+	for (i = 0; i < 8; i++) {
+		XA_BUG_ON(xa, xa_reserve(xa, 0, GFP_KERNEL) != 0);
+		XA_BUG_ON(xa, xa_store(xa, 0, name + i, 0) != NULL);
+		xa_erase(xa, 0);
+	}
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_align(struct xarray *xa)
+{
+	char name[] = "Motorola 68000";
+
+	check_align_1(xa, name);
+	check_align_1(xa, name + 1);
+	check_align_1(xa, name + 2);
+	check_align_1(xa, name + 3);
+	check_align_2(xa, name);
+}
+
 static LIST_HEAD(shadow_nodes);
 
 static void test_update_node(struct xa_node *node)
@@ -1322,6 +1529,7 @@ static int xarray_checks(void)
 	check_xas_erase(&array);
 	check_cmpxchg(&array);
 	check_reserve(&array);
+	check_reserve(&xa0);
 	check_multi_store(&array);
 	check_xa_alloc();
 	check_find(&array);
@@ -1332,6 +1540,7 @@ static int xarray_checks(void)
 	check_create_range(&array);
 	check_store_range(&array);
 	check_store_iter(&array);
+	check_align(&xa0);
 
 	check_workingset(&array, 0);
 	check_workingset(&array, 64);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3add92329bae..791b6fa36905 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -17,6 +17,7 @@
  */
 
 #include <stdarg.h>
+#include <linux/build_bug.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/module.h>	/* for KSYM_SYMBOL_LEN */
@@ -405,6 +406,8 @@ struct printf_spec {
 	unsigned int	base:8;		/* number base, 8, 10 or 16 only */
 	signed int	precision:16;	/* # of digits/chars */
 } __packed;
+static_assert(sizeof(struct printf_spec) == 8);
+
 #define FIELD_WIDTH_MAX ((1 << 23) - 1)
 #define PRECISION_MAX ((1 << 15) - 1)
 
@@ -422,8 +425,6 @@ char *number(char *buf, char *end, unsigned long long num,
 	int field_width = spec.field_width;
 	int precision = spec.precision;
 
-	BUILD_BUG_ON(sizeof(struct printf_spec) != 8);
-
 	/* locase = 0 or 0x20. ORing digits or letters with 'locase'
 	 * produces same digits or (maybe lowercased) letters */
 	locase = (spec.flags & SMALL);
@@ -1930,7 +1931,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
  *       (legacy clock framework) of the clock
  * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address
  *        (legacy clock framework) of the clock
- * - 'Cr' For a clock, it prints the current rate of the clock
  * - 'G' For flags to be printed as a collection of symbolic strings that would
  *       construct the specific value. Supported flags given by option:
  *       p page flags (see struct page) given as pointer to unsigned long
diff --git a/lib/xarray.c b/lib/xarray.c
index 5f3f9311de89..6be3acbb861f 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -57,6 +57,11 @@ static inline bool xa_track_free(const struct xarray *xa)
 	return xa->xa_flags & XA_FLAGS_TRACK_FREE;
 }
 
+static inline bool xa_zero_busy(const struct xarray *xa)
+{
+	return xa->xa_flags & XA_FLAGS_ZERO_BUSY;
+}
+
 static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
 {
 	if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
@@ -232,6 +237,8 @@ void *xas_load(struct xa_state *xas)
 		if (xas->xa_shift > node->shift)
 			break;
 		entry = xas_descend(xas, node);
+		if (node->shift == 0)
+			break;
 	}
 	return entry;
 }
@@ -430,6 +437,8 @@ static void xas_shrink(struct xa_state *xas)
 			break;
 		if (!xa_is_node(entry) && node->shift)
 			break;
+		if (xa_is_zero(entry) && xa_zero_busy(xa))
+			entry = NULL;
 		xas->xa_node = XAS_BOUNDS;
 
 		RCU_INIT_POINTER(xa->xa_head, entry);
@@ -506,7 +515,7 @@ static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
 	for (;;) {
 		void *entry = xa_entry_locked(xas->xa, node, offset);
 
-		if (xa_is_node(entry)) {
+		if (node->shift && xa_is_node(entry)) {
 			node = xa_to_node(entry);
 			offset = 0;
 			continue;
@@ -604,6 +613,7 @@ static int xas_expand(struct xa_state *xas, void *head)
 /*
  * xas_create() - Create a slot to store an entry in.
  * @xas: XArray operation state.
+ * @allow_root: %true if we can store the entry in the root directly
  *
  * Most users will not need to call this function directly, as it is called
  * by xas_store().  It is useful for doing conditional store operations
@@ -613,7 +623,7 @@ static int xas_expand(struct xa_state *xas, void *head)
  * If the slot was newly created, returns %NULL.  If it failed to create the
  * slot, returns %NULL and indicates the error in @xas.
  */
-static void *xas_create(struct xa_state *xas)
+static void *xas_create(struct xa_state *xas, bool allow_root)
 {
 	struct xarray *xa = xas->xa;
 	void *entry;
@@ -625,9 +635,13 @@ static void *xas_create(struct xa_state *xas)
 	if (xas_top(node)) {
 		entry = xa_head_locked(xa);
 		xas->xa_node = NULL;
+		if (!entry && xa_zero_busy(xa))
+			entry = XA_ZERO_ENTRY;
 		shift = xas_expand(xas, entry);
 		if (shift < 0)
 			return NULL;
+		if (!shift && !allow_root)
+			shift = XA_CHUNK_SHIFT;
 		entry = xa_head_locked(xa);
 		slot = &xa->xa_head;
 	} else if (xas_error(xas)) {
@@ -687,7 +701,7 @@ void xas_create_range(struct xa_state *xas)
 	xas->xa_sibs = 0;
 
 	for (;;) {
-		xas_create(xas);
+		xas_create(xas, true);
 		if (xas_error(xas))
 			goto restore;
 		if (xas->xa_index <= (index | XA_CHUNK_MASK))
@@ -753,10 +767,12 @@ void *xas_store(struct xa_state *xas, void *entry)
 	void *first, *next;
 	bool value = xa_is_value(entry);
 
-	if (entry)
-		first = xas_create(xas);
-	else
+	if (entry) {
+		bool allow_root = !xa_is_node(entry) && !xa_is_zero(entry);
+		first = xas_create(xas, allow_root);
+	} else {
 		first = xas_load(xas);
+	}
 
 	if (xas_invalid(xas))
 		return first;
@@ -786,7 +802,7 @@ void *xas_store(struct xa_state *xas, void *entry)
 		 * entry is set to NULL.
 		 */
 		rcu_assign_pointer(*slot, entry);
-		if (xa_is_node(next))
+		if (xa_is_node(next) && (!node || node->shift))
 			xas_free_nodes(xas, xa_to_node(next));
 		if (!node)
 			break;
@@ -1251,35 +1267,6 @@ void *xas_find_conflict(struct xa_state *xas)
 EXPORT_SYMBOL_GPL(xas_find_conflict);
 
 /**
- * xa_init_flags() - Initialise an empty XArray with flags.
- * @xa: XArray.
- * @flags: XA_FLAG values.
- *
- * If you need to initialise an XArray with special flags (eg you need
- * to take the lock from interrupt context), use this function instead
- * of xa_init().
- *
- * Context: Any context.
- */
-void xa_init_flags(struct xarray *xa, gfp_t flags)
-{
-	unsigned int lock_type;
-	static struct lock_class_key xa_lock_irq;
-	static struct lock_class_key xa_lock_bh;
-
-	spin_lock_init(&xa->xa_lock);
-	xa->xa_flags = flags;
-	xa->xa_head = NULL;
-
-	lock_type = xa_lock_type(xa);
-	if (lock_type == XA_LOCK_IRQ)
-		lockdep_set_class(&xa->xa_lock, &xa_lock_irq);
-	else if (lock_type == XA_LOCK_BH)
-		lockdep_set_class(&xa->xa_lock, &xa_lock_bh);
-}
-EXPORT_SYMBOL(xa_init_flags);
-
-/**
  * xa_load() - Load an entry from an XArray.
  * @xa: XArray.
  * @index: index into array.
@@ -1308,7 +1295,6 @@ static void *xas_result(struct xa_state *xas, void *curr)
 {
 	if (xa_is_zero(curr))
 		return NULL;
-	XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
 	if (xas_error(xas))
 		curr = xas->xa_node;
 	return curr;
@@ -1319,13 +1305,12 @@ static void *xas_result(struct xa_state *xas, void *curr)
  * @xa: XArray.
  * @index: Index into array.
  *
- * If the entry at this index is a multi-index entry then all indices will
- * be erased, and the entry will no longer be a multi-index entry.
- * This function expects the xa_lock to be held on entry.
+ * After this function returns, loading from @index will return %NULL.
+ * If the index is part of a multi-index entry, all indices will be erased
+ * and none of the entries will be part of a multi-index entry.
  *
- * Context: Any context.  Expects xa_lock to be held on entry.  May
- * release and reacquire xa_lock if @gfp flags permit.
- * Return: The old entry at this index.
+ * Context: Any context.  Expects xa_lock to be held on entry.
+ * Return: The entry which used to be at this index.
  */
 void *__xa_erase(struct xarray *xa, unsigned long index)
 {
@@ -1339,9 +1324,9 @@ EXPORT_SYMBOL(__xa_erase);
  * @xa: XArray.
  * @index: Index of entry.
  *
- * This function is the equivalent of calling xa_store() with %NULL as
- * the third argument.  The XArray does not need to allocate memory, so
- * the user does not need to provide GFP flags.
+ * After this function returns, loading from @index will return %NULL.
+ * If the index is part of a multi-index entry, all indices will be erased
+ * and none of the entries will be part of a multi-index entry.
  *
  * Context: Any context.  Takes and releases the xa_lock.
  * Return: The entry which used to be at this index.
@@ -1378,7 +1363,7 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 	XA_STATE(xas, xa, index);
 	void *curr;
 
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
+	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return XA_ERROR(-EINVAL);
 	if (xa_track_free(xa) && !entry)
 		entry = XA_ZERO_ENTRY;
@@ -1444,18 +1429,14 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 	XA_STATE(xas, xa, index);
 	void *curr;
 
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
+	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return XA_ERROR(-EINVAL);
-	if (xa_track_free(xa) && !entry)
-		entry = XA_ZERO_ENTRY;
 
 	do {
 		curr = xas_load(&xas);
-		if (curr == XA_ZERO_ENTRY)
-			curr = NULL;
 		if (curr == old) {
 			xas_store(&xas, entry);
-			if (xa_track_free(xa))
+			if (xa_track_free(xa) && entry && !curr)
 				xas_clear_mark(&xas, XA_FREE_MARK);
 		}
 	} while (__xas_nomem(&xas, gfp));
@@ -1465,40 +1446,45 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 EXPORT_SYMBOL(__xa_cmpxchg);
 
 /**
- * __xa_reserve() - Reserve this index in the XArray.
+ * __xa_insert() - Store this entry in the XArray if no entry is present.
  * @xa: XArray.
  * @index: Index into array.
+ * @entry: New entry.
  * @gfp: Memory allocation flags.
  *
- * Ensures there is somewhere to store an entry at @index in the array.
- * If there is already something stored at @index, this function does
- * nothing.  If there was nothing there, the entry is marked as reserved.
- * Loading from a reserved entry returns a %NULL pointer.
- *
- * If you do not use the entry that you have reserved, call xa_release()
- * or xa_erase() to free any unnecessary memory.
+ * Inserting a NULL entry will store a reserved entry (like xa_reserve())
+ * if no entry is present.  Inserting will fail if a reserved entry is
+ * present, even though loading from this index will return NULL.
  *
- * Context: Any context.  Expects the xa_lock to be held on entry.  May
- * release the lock, sleep and reacquire the lock if the @gfp flags permit.
- * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
+ * -ENOMEM if memory could not be allocated.
  */
-int __xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 {
 	XA_STATE(xas, xa, index);
 	void *curr;
 
+	if (WARN_ON_ONCE(xa_is_advanced(entry)))
+		return -EINVAL;
+	if (!entry)
+		entry = XA_ZERO_ENTRY;
+
 	do {
 		curr = xas_load(&xas);
 		if (!curr) {
-			xas_store(&xas, XA_ZERO_ENTRY);
+			xas_store(&xas, entry);
 			if (xa_track_free(xa))
 				xas_clear_mark(&xas, XA_FREE_MARK);
+		} else {
+			xas_set_err(&xas, -EBUSY);
 		}
 	} while (__xas_nomem(&xas, gfp));
 
 	return xas_error(&xas);
 }
-EXPORT_SYMBOL(__xa_reserve);
+EXPORT_SYMBOL(__xa_insert);
 
 #ifdef CONFIG_XARRAY_MULTI
 static void xas_set_range(struct xa_state *xas, unsigned long first,
@@ -1567,7 +1553,7 @@ void *xa_store_range(struct xarray *xa, unsigned long first,
 			if (last + 1)
 				order = __ffs(last + 1);
 			xas_set_order(&xas, last, order);
-			xas_create(&xas);
+			xas_create(&xas, true);
 			if (xas_error(&xas))
 				goto unlock;
 		}
@@ -1591,25 +1577,25 @@ EXPORT_SYMBOL(xa_store_range);
  * __xa_alloc() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
  * @id: Pointer to ID.
- * @max: Maximum ID to allocate (inclusive).
+ * @limit: Range for allocated ID.
  * @entry: New entry.
  * @gfp: Memory allocation flags.
  *
- * Allocates an unused ID in the range specified by @id and @max.
- * Updates the @id pointer with the index, then stores the entry at that
- * index.  A concurrent lookup will not see an uninitialised @id.
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
  *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
- * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
- * there is no more space in the XArray.
+ * Return: 0 on success, -ENOMEM if memory could not be allocated or
+ * -EBUSY if there are no free entries in @limit.
  */
-int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
+int __xa_alloc(struct xarray *xa, u32 *id, void *entry,
+		struct xa_limit limit, gfp_t gfp)
 {
 	XA_STATE(xas, xa, 0);
-	int err;
 
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
+	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return -EINVAL;
 	if (WARN_ON_ONCE(!xa_track_free(xa)))
 		return -EINVAL;
@@ -1618,22 +1604,71 @@ int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
 		entry = XA_ZERO_ENTRY;
 
 	do {
-		xas.xa_index = *id;
-		xas_find_marked(&xas, max, XA_FREE_MARK);
+		xas.xa_index = limit.min;
+		xas_find_marked(&xas, limit.max, XA_FREE_MARK);
 		if (xas.xa_node == XAS_RESTART)
-			xas_set_err(&xas, -ENOSPC);
+			xas_set_err(&xas, -EBUSY);
+		else
+			*id = xas.xa_index;
 		xas_store(&xas, entry);
 		xas_clear_mark(&xas, XA_FREE_MARK);
 	} while (__xas_nomem(&xas, gfp));
 
-	err = xas_error(&xas);
-	if (!err)
-		*id = xas.xa_index;
-	return err;
+	return xas_error(&xas);
 }
 EXPORT_SYMBOL(__xa_alloc);
 
 /**
+ * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @entry: New entry.
+ * @limit: Range of allocated ID.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
+ * The search for an empty entry will start at @next and will wrap
+ * around if necessary.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: 0 if the allocation succeeded without wrapping.  1 if the
+ * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * allocated or -EBUSY if there are no free entries in @limit.
+ */
+int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
+		struct xa_limit limit, u32 *next, gfp_t gfp)
+{
+	u32 min = limit.min;
+	int ret;
+
+	limit.min = max(min, *next);
+	ret = __xa_alloc(xa, id, entry, limit, gfp);
+	if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) {
+		xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED;
+		ret = 1;
+	}
+
+	if (ret < 0 && limit.min > min) {
+		limit.min = min;
+		ret = __xa_alloc(xa, id, entry, limit, gfp);
+		if (ret == 0)
+			ret = 1;
+	}
+
+	if (ret >= 0) {
+		*next = *id + 1;
+		if (*next == 0)
+			xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(__xa_alloc_cyclic);
+
+/**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
  * @index: Index of entry.
@@ -1927,6 +1962,8 @@ void xa_destroy(struct xarray *xa)
 	entry = xa_head_locked(xa);
 	RCU_INIT_POINTER(xa->xa_head, NULL);
 	xas_init_marks(&xas);
+	if (xa_zero_busy(xa))
+		xa_mark_clear(xa, XA_FREE_MARK);
 	/* lockdep checks we're still holding the lock in xas_free_nodes() */
 	if (xa_is_node(entry))
 		xas_free_nodes(&xas, xa_to_node(entry));