Merge tag 'v4.17-rc3' into apparmor-next

Linux v4.17-rc3 Merge in v4.17 for LSM updates Signed-off-by: John Johansen <john.johansen@canonical.com>
author: John Johansen <john.johansen@canonical.com> 2018-05-02 00:38:52 -0700
committer: John Johansen <john.johansen@canonical.com> 2018-05-02 00:38:52 -0700
commit: 552c69b36ebd966186573b9c7a286b390935cce1 (patch)
tree: 43ae55082805c8e7f900a63197d21de36937de16 /lib
parent: apparmor: fix memory leak on buffer on error exit path (diff)
parent: Linux v4.17-rc3 (diff)
download: linux-dev-552c69b36ebd966186573b9c7a286b390935cce1.tar.xz
linux-dev-552c69b36ebd966186573b9c7a286b390935cce1.zip
68 files changed, 3567 insertions, 1263 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index c5e84fbcb30b..5fe577673b98 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -55,6 +55,22 @@ config ARCH_USE_CMPXCHG_LOCKREF
 config ARCH_HAS_FAST_MULTIPLIER
 	bool
 
+config INDIRECT_PIO
+	bool "Access I/O in non-MMIO mode"
+	depends on ARM64
+	help
+	  On some platforms where no separate I/O space exists, there are I/O
+	  hosts which can not be accessed in MMIO mode. Using the logical PIO
+	  mechanism, the host-local I/O resource can be mapped into system
+	  logic PIO space shared with MMIO hosts, such as PCI/PCIe, then the
+	  system can access the I/O devices with the mapped-logic PIO through
+	  I/O accessors.
+
+	  This way has relatively little I/O performance cost. Please make
+	  sure your devices really need this configure item enabled.
+
+	  When in doubt, say N.
+
 config CRC_CCITT
 	tristate "CRC-CCITT functions"
 	help
@@ -409,7 +425,11 @@ config HAS_DMA
 	depends on !NO_DMA
 	default y
 
-config DMA_NOOP_OPS
+config SGL_ALLOC
+	bool
+	default n
+
+config DMA_DIRECT_OPS
 	bool
 	depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
 	default n
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9d5b78aad4c5..c40c7b734cd1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -165,7 +165,7 @@ config DEBUG_INFO_REDUCED
 
 config DEBUG_INFO_SPLIT
 	bool "Produce split debuginfo in .dwo files"
-	depends on DEBUG_INFO && !FRV
+	depends on DEBUG_INFO
 	help
 	  Generate debug info into separate .dwo files. This significantly
 	  reduces the build directory size for builds with DEBUG_INFO,
@@ -217,7 +217,7 @@ config ENABLE_MUST_CHECK
 config FRAME_WARN
 	int "Warn for stack frames larger than (needs gcc 4.4)"
 	range 0 8192
-	default 0 if KASAN
+	default 3072 if KASAN_EXTRA
 	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
 	default 1280 if (!64BIT && PARISC)
 	default 1024 if (!64BIT && !PARISC)
@@ -324,11 +324,11 @@ config DEBUG_SECTION_MISMATCH
 	    the analysis would not catch the illegal reference.
 	    This option tells gcc to inline less (but it does result in
 	    a larger kernel).
-	  - Run the section mismatch analysis for each module/built-in.o file.
+	  - Run the section mismatch analysis for each module/built-in.a file.
 	    When we run the section mismatch analysis on vmlinux.o, we
 	    lose valuable information about where the mismatch was
 	    introduced.
-	    Running the analysis for each module/built-in.o file
+	    Running the analysis for each module/built-in.a file
 	    tells where the mismatch happens much closer to the
 	    source. The drawback is that the same mismatch is
 	    reported at least twice.
@@ -351,14 +351,10 @@ config SECTION_MISMATCH_WARN_ONLY
 #
 config ARCH_WANT_FRAME_POINTERS
 	bool
-	help
 
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
-	depends on DEBUG_KERNEL && \
-		(CRIS || M68K || FRV || UML || \
-		 SUPERH || BLACKFIN || MN10300 || METAG) || \
-		ARCH_WANT_FRAME_POINTERS
+	depends on DEBUG_KERNEL && (M68K || UML || SUPERH) || ARCH_WANT_FRAME_POINTERS
 	default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
 	help
 	  If you say Y here the resulting kernel image will be slightly
@@ -804,6 +800,30 @@ config SOFTLOCKUP_DETECTOR
 	  chance to run.  The current stack trace is displayed upon
 	  detection and the system will stay locked up.
 
+config BOOTPARAM_SOFTLOCKUP_PANIC
+	bool "Panic (Reboot) On Soft Lockups"
+	depends on SOFTLOCKUP_DETECTOR
+	help
+	  Say Y here to enable the kernel to panic on "soft lockups",
+	  which are bugs that cause the kernel to loop in kernel
+	  mode for more than 20 seconds (configurable using the watchdog_thresh
+	  sysctl), without giving other tasks a chance to run.
+
+	  The panic can be used in combination with panic_timeout,
+	  to cause the system to reboot automatically after a
+	  lockup has been detected. This feature is useful for
+	  high-availability systems that have uptime guarantees and
+	  where a lockup must be resolved ASAP.
+
+	  Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+	int
+	depends on SOFTLOCKUP_DETECTOR
+	range 0 1
+	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
 config HARDLOCKUP_DETECTOR_PERF
 	bool
 	select SOFTLOCKUP_DETECTOR
@@ -853,30 +873,6 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
 	default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
 	default 1 if BOOTPARAM_HARDLOCKUP_PANIC
 
-config BOOTPARAM_SOFTLOCKUP_PANIC
-	bool "Panic (Reboot) On Soft Lockups"
-	depends on SOFTLOCKUP_DETECTOR
-	help
-	  Say Y here to enable the kernel to panic on "soft lockups",
-	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 20 seconds (configurable using the watchdog_thresh
-	  sysctl), without giving other tasks a chance to run.
-
-	  The panic can be used in combination with panic_timeout,
-	  to cause the system to reboot automatically after a
-	  lockup has been detected. This feature is useful for
-	  high-availability systems that have uptime guarantees and
-	  where a lockup must be resolved ASAP.
-
-	  Say N if unsure.
-
-config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
-	int
-	depends on SOFTLOCKUP_DETECTOR
-	range 0 1
-	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
-	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
-
 config DETECT_HUNG_TASK
 	bool "Detect Hung Tasks"
 	depends on DEBUG_KERNEL
@@ -1035,69 +1031,20 @@ config DEBUG_PREEMPT
 
 menu "Lock Debugging (spinlocks, mutexes, etc...)"
 
-config DEBUG_RT_MUTEXES
-	bool "RT Mutex debugging, deadlock detection"
-	depends on DEBUG_KERNEL && RT_MUTEXES
-	help
-	 This allows rt mutex semantics violations and rt mutex related
-	 deadlocks (lockups) to be detected and reported automatically.
-
-config DEBUG_SPINLOCK
-	bool "Spinlock and rw-lock debugging: basic checks"
-	depends on DEBUG_KERNEL
-	select UNINLINE_SPIN_UNLOCK
-	help
-	  Say Y here and build SMP to catch missing spinlock initialization
-	  and certain other kinds of spinlock errors commonly made.  This is
-	  best used in conjunction with the NMI watchdog so that spinlock
-	  deadlocks are also debuggable.
-
-config DEBUG_MUTEXES
-	bool "Mutex debugging: basic checks"
-	depends on DEBUG_KERNEL
-	help
-	 This feature allows mutex semantics violations to be detected and
-	 reported.
-
-config DEBUG_WW_MUTEX_SLOWPATH
-	bool "Wait/wound mutex debugging: Slowpath testing"
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-	select DEBUG_LOCK_ALLOC
-	select DEBUG_SPINLOCK
-	select DEBUG_MUTEXES
-	help
-	 This feature enables slowpath testing for w/w mutex users by
-	 injecting additional -EDEADLK wound/backoff cases. Together with
-	 the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
-	 will test all possible w/w mutex interface abuse with the
-	 exception of simply not acquiring all the required locks.
-	 Note that this feature can introduce significant overhead, so
-	 it really should not be enabled in a production or distro kernel,
-	 even a debug kernel.  If you are a driver writer, enable it.  If
-	 you are a distro, do not.
-
-config DEBUG_LOCK_ALLOC
-	bool "Lock debugging: detect incorrect freeing of live locks"
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-	select DEBUG_SPINLOCK
-	select DEBUG_MUTEXES
-	select DEBUG_RT_MUTEXES if RT_MUTEXES
-	select LOCKDEP
-	help
-	 This feature will check whether any held lock (spinlock, rwlock,
-	 mutex or rwsem) is incorrectly freed by the kernel, via any of the
-	 memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
-	 vfree(), etc.), whether a live lock is incorrectly reinitialized via
-	 spin_lock_init()/mutex_init()/etc., or whether there is any lock
-	 held during task exit.
+config LOCK_DEBUGGING_SUPPORT
+	bool
+	depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	default y
 
 config PROVE_LOCKING
 	bool "Lock debugging: prove locking correctness"
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
 	select DEBUG_RT_MUTEXES if RT_MUTEXES
+	select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER
+	select DEBUG_WW_MUTEX_SLOWPATH
 	select DEBUG_LOCK_ALLOC
 	select TRACE_IRQFLAGS
 	default n
@@ -1135,20 +1082,9 @@ config PROVE_LOCKING
 
 	 For more details, see Documentation/locking/lockdep-design.txt.
 
-config LOCKDEP
-	bool
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE && !X86
-	select KALLSYMS
-	select KALLSYMS_ALL
-
-config LOCKDEP_SMALL
-	bool
-
 config LOCK_STAT
 	bool "Lock usage statistics"
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
@@ -1168,6 +1104,80 @@ config LOCK_STAT
 	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
 
+config DEBUG_RT_MUTEXES
+	bool "RT Mutex debugging, deadlock detection"
+	depends on DEBUG_KERNEL && RT_MUTEXES
+	help
+	 This allows rt mutex semantics violations and rt mutex related
+	 deadlocks (lockups) to be detected and reported automatically.
+
+config DEBUG_SPINLOCK
+	bool "Spinlock and rw-lock debugging: basic checks"
+	depends on DEBUG_KERNEL
+	select UNINLINE_SPIN_UNLOCK
+	help
+	  Say Y here and build SMP to catch missing spinlock initialization
+	  and certain other kinds of spinlock errors commonly made.  This is
+	  best used in conjunction with the NMI watchdog so that spinlock
+	  deadlocks are also debuggable.
+
+config DEBUG_MUTEXES
+	bool "Mutex debugging: basic checks"
+	depends on DEBUG_KERNEL
+	help
+	 This feature allows mutex semantics violations to be detected and
+	 reported.
+
+config DEBUG_WW_MUTEX_SLOWPATH
+	bool "Wait/wound mutex debugging: Slowpath testing"
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+	select DEBUG_LOCK_ALLOC
+	select DEBUG_SPINLOCK
+	select DEBUG_MUTEXES
+	help
+	 This feature enables slowpath testing for w/w mutex users by
+	 injecting additional -EDEADLK wound/backoff cases. Together with
+	 the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
+	 will test all possible w/w mutex interface abuse with the
+	 exception of simply not acquiring all the required locks.
+	 Note that this feature can introduce significant overhead, so
+	 it really should not be enabled in a production or distro kernel,
+	 even a debug kernel.  If you are a driver writer, enable it.  If
+	 you are a distro, do not.
+
+config DEBUG_RWSEMS
+	bool "RW Semaphore debugging: basic checks"
+	depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER
+	help
+	  This debugging feature allows mismatched rw semaphore locks and unlocks
+	  to be detected and reported.
+
+config DEBUG_LOCK_ALLOC
+	bool "Lock debugging: detect incorrect freeing of live locks"
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+	select DEBUG_SPINLOCK
+	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
+	select LOCKDEP
+	help
+	 This feature will check whether any held lock (spinlock, rwlock,
+	 mutex or rwsem) is incorrectly freed by the kernel, via any of the
+	 memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
+	 vfree(), etc.), whether a live lock is incorrectly reinitialized via
+	 spin_lock_init()/mutex_init()/etc., or whether there is any lock
+	 held during task exit.
+
+config LOCKDEP
+	bool
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+	select STACKTRACE
+	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
+	select KALLSYMS
+	select KALLSYMS_ALL
+
+config LOCKDEP_SMALL
+	bool
+
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
@@ -1500,6 +1510,10 @@ config FAULT_INJECTION
 	  Provide fault-injection framework.
 	  For more details, see Documentation/fault-injection/.
 
+config FUNCTION_ERROR_INJECTION
+	def_bool y
+	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+
 config FAILSLAB
 	bool "Fault-injection capability for kmalloc"
 	depends on FAULT_INJECTION
@@ -1547,6 +1561,16 @@ config FAIL_FUTEX
 	help
 	  Provide fault-injection capability for futexes.
 
+config FAIL_FUNCTION
+	bool "Fault-injection capability for functions"
+	depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
+	help
+	  Provide function-based fault-injection capability.
+	  This will allow you to override a specific function with a return
+	  with given return value. As a result, function caller will see
+	  an error value and have to handle it. This is useful to test the
+	  error handling in various subsystems.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
@@ -1558,7 +1582,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE && !X86
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
@@ -1627,7 +1651,11 @@ config DMA_API_DEBUG
 
 	  If unsure, say N.
 
-menu "Runtime Testing"
+menuconfig RUNTIME_TESTING_MENU
+	bool "Runtime Testing"
+	def_bool y
+
+if RUNTIME_TESTING_MENU
 
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
@@ -1827,7 +1855,7 @@ config TEST_BPF
 
 	  If unsure, say N.
 
-config TEST_FIND_BIT
+config FIND_BIT_BENCHMARK
 	tristate "Test find_bit functions"
 	default n
 	help
@@ -1915,7 +1943,7 @@ config TEST_DEBUG_VIRTUAL
 
 	  If unsure, say N.
 
-endmenu # runtime tests
+endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
 	bool "Memtest"
@@ -1952,7 +1980,7 @@ config STRICT_DEVMEM
 	bool "Filter access to /dev/mem"
 	depends on MMU && DEVMEM
 	depends on ARCH_HAS_DEVMEM_IS_ALLOWED
-	default y if TILE || PPC
+	default y if PPC || X86 || ARM64
 	---help---
 	  If this option is disabled, you allow userspace (root) access to all
 	  of memory, including kernel and userspace memory. Accidental
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index bd38aab05929..3d35d062970d 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -20,6 +20,17 @@ config KASAN
 	  Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
 	  (the resulting kernel does not boot).
 
+config KASAN_EXTRA
+	bool "KAsan: extra checks"
+	depends on KASAN && DEBUG_KERNEL && !COMPILE_TEST
+	help
+	  This enables further checks in the kernel address sanitizer, for now
+	  it only includes the address-use-after-scope check that can lead
+	  to excessive kernel stack usage, frame size warnings and longer
+	  compile time.
+	  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 has more
+
+
 choice
 	prompt "Instrumentation type"
 	depends on KASAN
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index a669c193b878..19d42ea75ec2 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -46,3 +46,10 @@ config UBSAN_NULL
 	help
 	  This option enables detection of memory accesses via a
 	  null pointer.
+
+config TEST_UBSAN
+	tristate "Module for testing for undefined behavior detection"
+	depends on m && UBSAN
+	help
+	  This is a test module for UBSAN.
+	  It triggers various undefined behavior, and detect it.
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..ce20696d5a92 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
+	 rbtree.o radix-tree.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
@@ -26,9 +26,10 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 earlycpio.o seq_buf.o siphash.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o
 
+lib-$(CONFIG_PRINTK) += dump_stack.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
-lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
+lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
 lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
 
 lib-y	+= kobject.o klist.o
@@ -39,19 +40,22 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
-	 once.o refcount.o usercopy.o errseq.o
+	 once.o refcount.o usercopy.o errseq.o bucket_locks.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
 obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
+obj-$(CONFIG_FIND_BIT_BENCHMARK) += find_bit_benchmark.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
-obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
+CFLAGS_test_kasan.o += -fno-builtin
+obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
+UBSAN_SANITIZE_test_ubsan.o := y
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
@@ -81,6 +85,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
+obj-y += logic_pio.o
+
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
@@ -149,6 +155,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
 obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
 obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
 	of-reconfig-notifier-error-inject.o
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index b77d51da8c73..c6659cb37033 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -38,12 +38,10 @@ begin_node:
 	if (assoc_array_ptr_is_shortcut(cursor)) {
 		/* Descend through a shortcut */
 		shortcut = assoc_array_ptr_to_shortcut(cursor);
-		smp_read_barrier_depends();
-		cursor = READ_ONCE(shortcut->next_node);
+		cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */
 	}
 
 	node = assoc_array_ptr_to_node(cursor);
-	smp_read_barrier_depends();
 	slot = 0;
 
 	/* We perform two passes of each node.
@@ -55,15 +53,12 @@ begin_node:
 	 */
 	has_meta = 0;
 	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
-		ptr = READ_ONCE(node->slots[slot]);
+		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
 		has_meta |= (unsigned long)ptr;
 		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
-			/* We need a barrier between the read of the pointer
-			 * and dereferencing the pointer - but only if we are
-			 * actually going to dereference it.
+			/* We need a barrier between the read of the pointer,
+			 * which is supplied by the above READ_ONCE().
 			 */
-			smp_read_barrier_depends();
-
 			/* Invoke the callback */
 			ret = iterator(assoc_array_ptr_to_leaf(ptr),
 				       iterator_data);
@@ -86,10 +81,8 @@ begin_node:
 
 continue_node:
 	node = assoc_array_ptr_to_node(cursor);
-	smp_read_barrier_depends();
-
 	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
-		ptr = READ_ONCE(node->slots[slot]);
+		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
 		if (assoc_array_ptr_is_meta(ptr)) {
 			cursor = ptr;
 			goto begin_node;
@@ -98,16 +91,15 @@ continue_node:
 
 finished_node:
 	/* Move up to the parent (may need to skip back over a shortcut) */
-	parent = READ_ONCE(node->back_pointer);
+	parent = READ_ONCE(node->back_pointer); /* Address dependency. */
 	slot = node->parent_slot;
 	if (parent == stop)
 		return 0;
 
 	if (assoc_array_ptr_is_shortcut(parent)) {
 		shortcut = assoc_array_ptr_to_shortcut(parent);
-		smp_read_barrier_depends();
 		cursor = parent;
-		parent = READ_ONCE(shortcut->back_pointer);
+		parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */
 		slot = shortcut->parent_slot;
 		if (parent == stop)
 			return 0;
@@ -147,7 +139,7 @@ int assoc_array_iterate(const struct assoc_array *array,
 					void *iterator_data),
 			void *iterator_data)
 {
-	struct assoc_array_ptr *root = READ_ONCE(array->root);
+	struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */
 
 	if (!root)
 		return 0;
@@ -194,7 +186,7 @@ assoc_array_walk(const struct assoc_array *array,
 
 	pr_devel("-->%s()\n", __func__);
 
-	cursor = READ_ONCE(array->root);
+	cursor = READ_ONCE(array->root);  /* Address dependency. */
 	if (!cursor)
 		return assoc_array_walk_tree_empty;
 
@@ -216,11 +208,9 @@ jumped:
 
 consider_node:
 	node = assoc_array_ptr_to_node(cursor);
-	smp_read_barrier_depends();
-
 	slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
 	slot &= ASSOC_ARRAY_FAN_MASK;
-	ptr = READ_ONCE(node->slots[slot]);
+	ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
 
 	pr_devel("consider slot %x [ix=%d type=%lu]\n",
 		 slot, level, (unsigned long)ptr & 3);
@@ -254,7 +244,6 @@ consider_node:
 	cursor = ptr;
 follow_shortcut:
 	shortcut = assoc_array_ptr_to_shortcut(cursor);
-	smp_read_barrier_depends();
 	pr_devel("shortcut to %d\n", shortcut->skip_to_level);
 	sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
 	BUG_ON(sc_level > shortcut->skip_to_level);
@@ -294,7 +283,7 @@ follow_shortcut:
 	} while (sc_level < shortcut->skip_to_level);
 
 	/* The shortcut matches the leaf's index to this point. */
-	cursor = READ_ONCE(shortcut->next_node);
+	cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */
 	if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
 		level = sc_level;
 		goto jumped;
@@ -331,20 +320,18 @@ void *assoc_array_find(const struct assoc_array *array,
 		return NULL;
 
 	node = result.terminal_node.node;
-	smp_read_barrier_depends();
 
 	/* If the target key is available to us, it's has to be pointed to by
 	 * the terminal node.
 	 */
 	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
-		ptr = READ_ONCE(node->slots[slot]);
+		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
 		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
 			/* We need a barrier between the read of the pointer
 			 * and dereferencing the pointer - but only if we are
 			 * actually going to dereference it.
 			 */
 			leaf = assoc_array_ptr_to_leaf(ptr);
-			smp_read_barrier_depends();
 			if (ops->compare_object(leaf, index_key))
 				return (void *)leaf;
 		}
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d8f0c094b18e..a42eff7e8c48 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -607,7 +607,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen,
 		/* if no digit is after '-', it's wrong*/
 		if (at_start && in_range)
 			return -EINVAL;
-		if (!(a <= b) || !(used_size <= group_size))
+		if (!(a <= b) || group_size == 0 || !(used_size <= group_size))
 			return -EINVAL;
 		if (b >= nmaskbits)
 			return -ERANGE;
@@ -1106,111 +1106,80 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 EXPORT_SYMBOL(bitmap_allocate_region);
 
 /**
- * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap
- *	@bitmap: array of unsigned longs, the destination bitmap, non NULL
- *	@nbits: number of bits in @bitmap
- *	@buf: array of u32 (in host byte order), the source bitmap, non NULL
- *	@nwords: number of u32 words in @buf
- *
- * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining
- * bits between nword and nbits in @bitmap (if any) are cleared. In
- * last word of @bitmap, the bits beyond nbits (if any) are kept
- * unchanged.
+ * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
+ * @dst:   destination buffer
+ * @src:   bitmap to copy
+ * @nbits: number of bits in the bitmap
  *
- * Return the number of bits effectively copied.
+ * Require nbits % BITS_PER_LONG == 0.
  */
-unsigned int
-bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits,
-		     const u32 *buf, unsigned int nwords)
+#ifdef __BIG_ENDIAN
+void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits)
 {
-	unsigned int dst_idx, src_idx;
-
-	for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) {
-		unsigned long part = 0;
-
-		if (src_idx < nwords)
-			part = buf[src_idx++];
-
-#if BITS_PER_LONG == 64
-		if (src_idx < nwords)
-			part |= ((unsigned long) buf[src_idx++]) << 32;
-#endif
-
-		if (dst_idx < nbits/BITS_PER_LONG)
-			bitmap[dst_idx] = part;
-		else {
-			unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
+	unsigned int i;
 
-			bitmap[dst_idx] = (bitmap[dst_idx] & ~mask)
-				| (part & mask);
-		}
+	for (i = 0; i < nbits/BITS_PER_LONG; i++) {
+		if (BITS_PER_LONG == 64)
+			dst[i] = cpu_to_le64(src[i]);
+		else
+			dst[i] = cpu_to_le32(src[i]);
 	}
-
-	return min_t(unsigned int, nbits, 32*nwords);
 }
-EXPORT_SYMBOL(bitmap_from_u32array);
+EXPORT_SYMBOL(bitmap_copy_le);
+#endif
 
+#if BITS_PER_LONG == 64
 /**
- * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits
- *	@buf: array of u32 (in host byte order), the dest bitmap, non NULL
- *	@nwords: number of u32 words in @buf
- *	@bitmap: array of unsigned longs, the source bitmap, non NULL
+ * bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap
+ *	@bitmap: array of unsigned longs, the destination bitmap
+ *	@buf: array of u32 (in host byte order), the source bitmap
  *	@nbits: number of bits in @bitmap
- *
- * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining
- * bits after nbits in @buf (if any) are cleared.
- *
- * Return the number of bits effectively copied.
  */
-unsigned int
-bitmap_to_u32array(u32 *buf, unsigned int nwords,
-		   const unsigned long *bitmap, unsigned int nbits)
+void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
+						unsigned int nbits)
 {
-	unsigned int dst_idx = 0, src_idx = 0;
-
-	while (dst_idx < nwords) {
-		unsigned long part = 0;
-
-		if (src_idx < BITS_TO_LONGS(nbits)) {
-			part = bitmap[src_idx];
-			if (src_idx >= nbits/BITS_PER_LONG)
-				part &= BITMAP_LAST_WORD_MASK(nbits);
-			src_idx++;
-		}
+	unsigned int i, halfwords;
 
-		buf[dst_idx++] = part & 0xffffffffUL;
+	if (!nbits)
+		return;
 
-#if BITS_PER_LONG == 64
-		if (dst_idx < nwords) {
-			part >>= 32;
-			buf[dst_idx++] = part & 0xffffffffUL;
-		}
-#endif
+	halfwords = DIV_ROUND_UP(nbits, 32);
+	for (i = 0; i < halfwords; i++) {
+		bitmap[i/2] = (unsigned long) buf[i];
+		if (++i < halfwords)
+			bitmap[i/2] |= ((unsigned long) buf[i]) << 32;
 	}
 
-	return min_t(unsigned int, nbits, 32*nwords);
+	/* Clear tail bits in last word beyond nbits. */
+	if (nbits % BITS_PER_LONG)
+		bitmap[(halfwords - 1) / 2] &= BITMAP_LAST_WORD_MASK(nbits);
 }
-EXPORT_SYMBOL(bitmap_to_u32array);
+EXPORT_SYMBOL(bitmap_from_arr32);
 
 /**
- * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
- * @dst:   destination buffer
- * @src:   bitmap to copy
- * @nbits: number of bits in the bitmap
- *
- * Require nbits % BITS_PER_LONG == 0.
+ * bitmap_to_arr32 - copy the contents of bitmap to a u32 array of bits
+ *	@buf: array of u32 (in host byte order), the dest bitmap
+ *	@bitmap: array of unsigned longs, the source bitmap
+ *	@nbits: number of bits in @bitmap
  */
-#ifdef __BIG_ENDIAN
-void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits)
+void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits)
 {
-	unsigned int i;
+	unsigned int i, halfwords;
 
-	for (i = 0; i < nbits/BITS_PER_LONG; i++) {
-		if (BITS_PER_LONG == 64)
-			dst[i] = cpu_to_le64(src[i]);
-		else
-			dst[i] = cpu_to_le32(src[i]);
+	if (!nbits)
+		return;
+
+	halfwords = DIV_ROUND_UP(nbits, 32);
+	for (i = 0; i < halfwords; i++) {
+		buf[i] = (u32) (bitmap[i/2] & UINT_MAX);
+		if (++i < halfwords)
+			buf[i] = (u32) (bitmap[i/2] >> 32);
 	}
+
+	/* Clear tail bits in last element of array beyond nbits. */
+	if (nbits % BITS_PER_LONG)
+		buf[halfwords - 1] &= (u32) (UINT_MAX >> ((-nbits) & 31));
 }
-EXPORT_SYMBOL(bitmap_copy_le);
+EXPORT_SYMBOL(bitmap_to_arr32);
+
 #endif
diff --git a/lib/btree.c b/lib/btree.c
index f93a945274af..590facba2c50 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -3,7 +3,7 @@
  *
  * As should be obvious for Linux kernel code, license is GPLv2
  *
- * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Copyright (c) 2007-2008 Joern Engel <joern@purestorage.com>
  * Bits and pieces stolen from Peter Zijlstra's code, which is
  * Copyright 2007, Red Hat Inc. Peter Zijlstra
  * GPLv2
@@ -76,6 +76,8 @@ struct btree_geo btree_geo128 = {
 };
 EXPORT_SYMBOL_GPL(btree_geo128);
 
+#define MAX_KEYLEN	(2 * LONG_PER_U64)
+
 static struct kmem_cache *btree_cachep;
 
 void *btree_alloc(gfp_t gfp_mask, void *pool_data)
@@ -313,7 +315,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
 {
 	int i, height;
 	unsigned long *node, *oldnode;
-	unsigned long *retry_key = NULL, key[geo->keylen];
+	unsigned long *retry_key = NULL, key[MAX_KEYLEN];
 
 	if (keyzero(geo, __key))
 		return NULL;
@@ -639,8 +641,8 @@ EXPORT_SYMBOL_GPL(btree_remove);
 int btree_merge(struct btree_head *target, struct btree_head *victim,
 		struct btree_geo *geo, gfp_t gfp)
 {
-	unsigned long key[geo->keylen];
-	unsigned long dup[geo->keylen];
+	unsigned long key[MAX_KEYLEN];
+	unsigned long dup[MAX_KEYLEN];
 	void *val;
 	int err;
 
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
new file mode 100644
index 000000000000..266a97c5708b
--- /dev/null
+++ b/lib/bucket_locks.c
@@ -0,0 +1,54 @@
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/* Allocate an array of spinlocks to be accessed by a hash. Two arguments
+ * indicate the number of elements to allocate in the array. max_size
+ * gives the maximum number of elements to allocate. cpu_mult gives
+ * the number of locks per CPU to allocate. The size is rounded up
+ * to a power of 2 to be suitable as a hash table.
+ */
+
+int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
+			   size_t max_size, unsigned int cpu_mult, gfp_t gfp)
+{
+	spinlock_t *tlocks = NULL;
+	unsigned int i, size;
+#if defined(CONFIG_PROVE_LOCKING)
+	unsigned int nr_pcpus = 2;
+#else
+	unsigned int nr_pcpus = num_possible_cpus();
+#endif
+
+	if (cpu_mult) {
+		nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
+		size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size);
+	} else {
+		size = max_size;
+	}
+
+	if (sizeof(spinlock_t) != 0) {
+		if (gfpflags_allow_blocking(gfp))
+			tlocks = kvmalloc(size * sizeof(spinlock_t), gfp);
+		else
+			tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp);
+		if (!tlocks)
+			return -ENOMEM;
+		for (i = 0; i < size; i++)
+			spin_lock_init(&tlocks[i]);
+	}
+
+	*locks = tlocks;
+	*locks_mask = size - 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(alloc_bucket_spinlocks);
+
+void free_bucket_spinlocks(spinlock_t *locks)
+{
+	kvfree(locks);
+}
+EXPORT_SYMBOL(free_bucket_spinlocks);
diff --git a/lib/bug.c b/lib/bug.c
index c1b0fad31b10..1077366f496b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -150,6 +150,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 		return BUG_TRAP_TYPE_NONE;
 
 	bug = find_bug(bugaddr);
+	if (!bug)
+		return BUG_TRAP_TYPE_NONE;
 
 	file = NULL;
 	line = 0;
@@ -191,7 +193,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 	if (file)
 		pr_crit("kernel BUG at %s:%u!\n", file, line);
 	else
-		pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n",
+		pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
 			(void *)bugaddr);
 
 	return BUG_TRAP_TYPE_BUG;
diff --git a/lib/chacha20.c b/lib/chacha20.c
index 250ceed9ec9a..c1cc50fb68c9 100644
--- a/lib/chacha20.c
+++ b/lib/chacha20.c
@@ -16,12 +16,7 @@
 #include <asm/unaligned.h>
 #include <crypto/chacha20.h>
 
-static inline u32 rotl32(u32 v, u8 n)
-{
-	return (v << n) | (v >> (sizeof(v) * 8 - n));
-}
-
-extern void chacha20_block(u32 *state, void *stream)
+void chacha20_block(u32 *state, u32 *stream)
 {
 	u32 x[16], *out = stream;
 	int i;
@@ -30,45 +25,45 @@ extern void chacha20_block(u32 *state, void *stream)
 		x[i] = state[i];
 
 	for (i = 0; i < 20; i += 2) {
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
 
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
 
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
 
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
 
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
 
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
 
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
 
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(x); i++)
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 35fe142ebb5e..beca6244671a 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -33,10 +33,11 @@ EXPORT_SYMBOL(cpumask_next);
 int cpumask_next_and(int n, const struct cpumask *src1p,
 		     const struct cpumask *src2p)
 {
-	while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
-		if (cpumask_test_cpu(n, src2p))
-			break;
-	return n;
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpumask_check(n);
+	return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p),
+		nr_cpumask_bits, n + 1);
 }
 EXPORT_SYMBOL(cpumask_next_and);
 
diff --git a/lib/crc-ccitt.c b/lib/crc-ccitt.c
index 7f6dd68d2d09..d873b34039ff 100644
--- a/lib/crc-ccitt.c
+++ b/lib/crc-ccitt.c
@@ -51,8 +51,49 @@ u16 const crc_ccitt_table[256] = {
 };
 EXPORT_SYMBOL(crc_ccitt_table);
 
+/*
+ * Similar table to calculate CRC16 variant known as CRC-CCITT-FALSE
+ * Reflected bits order, does not augment final value.
+ */
+u16 const crc_ccitt_false_table[256] = {
+    0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
+    0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
+    0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
+    0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
+    0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
+    0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
+    0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
+    0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
+    0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
+    0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
+    0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
+    0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
+    0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
+    0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
+    0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
+    0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
+    0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
+    0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
+    0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
+    0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
+    0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
+    0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
+    0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
+    0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
+    0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
+    0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
+    0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
+    0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
+    0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
+    0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
+    0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
+    0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
+};
+EXPORT_SYMBOL(crc_ccitt_false_table);
+
 /**
- *	crc_ccitt - recompute the CRC for the data buffer
+ *	crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data
+ *	buffer
  *	@crc: previous CRC value
  *	@buffer: data pointer
  *	@len: number of bytes in the buffer
@@ -65,5 +106,20 @@ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len)
 }
 EXPORT_SYMBOL(crc_ccitt);
 
+/**
+ *	crc_ccitt_false - recompute the CRC (CRC-CCITT-FALSE variant)
+ *	for the data buffer
+ *	@crc: previous CRC value
+ *	@buffer: data pointer
+ *	@len: number of bytes in the buffer
+ */
+u16 crc_ccitt_false(u16 crc, u8 const *buffer, size_t len)
+{
+	while (len--)
+		crc = crc_ccitt_false_byte(crc, *buffer++);
+	return crc;
+}
+EXPORT_SYMBOL(crc_ccitt_false);
+
 MODULE_DESCRIPTION("CRC-CCITT calculations");
 MODULE_LICENSE("GPL");
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2f5349c6e81a..994be4805cec 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -42,14 +42,18 @@ static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
 static DEFINE_RAW_SPINLOCK(pool_lock);
 
 static HLIST_HEAD(obj_pool);
+static HLIST_HEAD(obj_to_free);
 
 static int			obj_pool_min_free = ODEBUG_POOL_SIZE;
 static int			obj_pool_free = ODEBUG_POOL_SIZE;
 static int			obj_pool_used;
 static int			obj_pool_max_used;
+/* The number of objs on the global free list */
+static int			obj_nr_tofree;
 static struct kmem_cache	*obj_cache;
 
 static int			debug_objects_maxchain __read_mostly;
+static int __maybe_unused	debug_objects_maxchecked __read_mostly;
 static int			debug_objects_fixups __read_mostly;
 static int			debug_objects_warnings __read_mostly;
 static int			debug_objects_enabled __read_mostly
@@ -96,12 +100,32 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
 static void fill_pool(void)
 {
 	gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-	struct debug_obj *new;
+	struct debug_obj *new, *obj;
 	unsigned long flags;
 
 	if (likely(obj_pool_free >= debug_objects_pool_min_level))
 		return;
 
+	/*
+	 * Reuse objs from the global free list; they will be reinitialized
+	 * when allocating.
+	 */
+	while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
+		raw_spin_lock_irqsave(&pool_lock, flags);
+		/*
+		 * Recheck with the lock held as the worker thread might have
+		 * won the race and freed the global free list already.
+		 */
+		if (obj_nr_tofree) {
+			obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
+			hlist_del(&obj->node);
+			obj_nr_tofree--;
+			hlist_add_head(&obj->node, &obj_pool);
+			obj_pool_free++;
+		}
+		raw_spin_unlock_irqrestore(&pool_lock, flags);
+	}
+
 	if (unlikely(!obj_cache))
 		return;
 
@@ -177,62 +201,76 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
  * workqueue function to free objects.
  *
  * To reduce contention on the global pool_lock, the actual freeing of
- * debug objects will be delayed if the pool_lock is busy. We also free
- * the objects in a batch of 4 for each lock/unlock cycle.
+ * debug objects will be delayed if the pool_lock is busy.
  */
-#define ODEBUG_FREE_BATCH	4
-
 static void free_obj_work(struct work_struct *work)
 {
-	struct debug_obj *objs[ODEBUG_FREE_BATCH];
+	struct hlist_node *tmp;
+	struct debug_obj *obj;
 	unsigned long flags;
-	int i;
+	HLIST_HEAD(tofree);
 
 	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
 		return;
-	while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) {
-		for (i = 0; i < ODEBUG_FREE_BATCH; i++) {
-			objs[i] = hlist_entry(obj_pool.first,
-					      typeof(*objs[0]), node);
-			hlist_del(&objs[i]->node);
-		}
 
-		obj_pool_free -= ODEBUG_FREE_BATCH;
-		debug_objects_freed += ODEBUG_FREE_BATCH;
-		/*
-		 * We release pool_lock across kmem_cache_free() to
-		 * avoid contention on pool_lock.
-		 */
-		raw_spin_unlock_irqrestore(&pool_lock, flags);
-		for (i = 0; i < ODEBUG_FREE_BATCH; i++)
-			kmem_cache_free(obj_cache, objs[i]);
-		if (!raw_spin_trylock_irqsave(&pool_lock, flags))
-			return;
+	/*
+	 * The objs on the pool list might be allocated before the work is
+	 * run, so recheck if pool list it full or not, if not fill pool
+	 * list from the global free list
+	 */
+	while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
+		obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
+		hlist_del(&obj->node);
+		hlist_add_head(&obj->node, &obj_pool);
+		obj_pool_free++;
+		obj_nr_tofree--;
+	}
+
+	/*
+	 * Pool list is already full and there are still objs on the free
+	 * list. Move remaining free objs to a temporary list to free the
+	 * memory outside the pool_lock held region.
+	 */
+	if (obj_nr_tofree) {
+		hlist_move_list(&obj_to_free, &tofree);
+		debug_objects_freed += obj_nr_tofree;
+		obj_nr_tofree = 0;
 	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);
+
+	hlist_for_each_entry_safe(obj, tmp, &tofree, node) {
+		hlist_del(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+	}
 }
 
-/*
- * Put the object back into the pool and schedule work to free objects
- * if necessary.
- */
-static void free_object(struct debug_obj *obj)
+static bool __free_object(struct debug_obj *obj)
 {
 	unsigned long flags;
-	int sched = 0;
+	bool work;
 
 	raw_spin_lock_irqsave(&pool_lock, flags);
-	/*
-	 * schedule work when the pool is filled and the cache is
-	 * initialized:
-	 */
-	if (obj_pool_free > debug_objects_pool_size && obj_cache)
-		sched = 1;
-	hlist_add_head(&obj->node, &obj_pool);
-	obj_pool_free++;
+	work = (obj_pool_free > debug_objects_pool_size) && obj_cache;
 	obj_pool_used--;
+
+	if (work) {
+		obj_nr_tofree++;
+		hlist_add_head(&obj->node, &obj_to_free);
+	} else {
+		obj_pool_free++;
+		hlist_add_head(&obj->node, &obj_pool);
+	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);
-	if (sched)
+	return work;
+}
+
+/*
+ * Put the object back into the pool and schedule work to free objects
+ * if necessary.
+ */
+static void free_object(struct debug_obj *obj)
+{
+	if (__free_object(obj))
 		schedule_work(&debug_obj_work);
 }
 
@@ -714,13 +752,13 @@ EXPORT_SYMBOL_GPL(debug_object_active_state);
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
 	unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
-	struct hlist_node *tmp;
-	HLIST_HEAD(freelist);
 	struct debug_obj_descr *descr;
 	enum debug_obj_state state;
 	struct debug_bucket *db;
+	struct hlist_node *tmp;
 	struct debug_obj *obj;
-	int cnt;
+	int cnt, objs_checked = 0;
+	bool work = false;
 
 	saddr = (unsigned long) address;
 	eaddr = saddr + size;
@@ -751,21 +789,24 @@ repeat:
 				goto repeat;
 			default:
 				hlist_del(&obj->node);
-				hlist_add_head(&obj->node, &freelist);
+				work |= __free_object(obj);
 				break;
 			}
 		}
 		raw_spin_unlock_irqrestore(&db->lock, flags);
 
-		/* Now free them */
-		hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
-			hlist_del(&obj->node);
-			free_object(obj);
-		}
-
 		if (cnt > debug_objects_maxchain)
 			debug_objects_maxchain = cnt;
+
+		objs_checked += cnt;
 	}
+
+	if (objs_checked > debug_objects_maxchecked)
+		debug_objects_maxchecked = objs_checked;
+
+	/* Schedule work to actually kmem_cache_free() objects */
+	if (work)
+		schedule_work(&debug_obj_work);
 }
 
 void debug_check_no_obj_freed(const void *address, unsigned long size)
@@ -780,12 +821,14 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
 static int debug_stats_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "max_chain     :%d\n", debug_objects_maxchain);
+	seq_printf(m, "max_checked   :%d\n", debug_objects_maxchecked);
 	seq_printf(m, "warnings      :%d\n", debug_objects_warnings);
 	seq_printf(m, "fixups        :%d\n", debug_objects_fixups);
 	seq_printf(m, "pool_free     :%d\n", obj_pool_free);
 	seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
 	seq_printf(m, "pool_used     :%d\n", obj_pool_used);
 	seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
+	seq_printf(m, "on_free_list  :%d\n", obj_nr_tofree);
 	seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
 	seq_printf(m, "objs_freed    :%d\n", debug_objects_freed);
 	return 0;
diff --git a/lib/devres.c b/lib/devres.c
index 5f2aedd58bc5..5bec1120b392 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -5,6 +5,12 @@
 #include <linux/gfp.h>
 #include <linux/export.h>
 
+enum devm_ioremap_type {
+	DEVM_IOREMAP = 0,
+	DEVM_IOREMAP_NC,
+	DEVM_IOREMAP_WC,
+};
+
 void devm_ioremap_release(struct device *dev, void *res)
 {
 	iounmap(*(void __iomem **)res);
@@ -15,24 +21,28 @@ static int devm_ioremap_match(struct device *dev, void *res, void *match_data)
 	return *(void **)res == match_data;
 }
 
-/**
- * devm_ioremap - Managed ioremap()
- * @dev: Generic device to remap IO address for
- * @offset: Resource address to map
- * @size: Size of map
- *
- * Managed ioremap().  Map is automatically unmapped on driver detach.
- */
-void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
-			   resource_size_t size)
+static void __iomem *__devm_ioremap(struct device *dev, resource_size_t offset,
+				    resource_size_t size,
+				    enum devm_ioremap_type type)
 {
-	void __iomem **ptr, *addr;
+	void __iomem **ptr, *addr = NULL;
 
 	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
 	if (!ptr)
 		return NULL;
 
-	addr = ioremap(offset, size);
+	switch (type) {
+	case DEVM_IOREMAP:
+		addr = ioremap(offset, size);
+		break;
+	case DEVM_IOREMAP_NC:
+		addr = ioremap_nocache(offset, size);
+		break;
+	case DEVM_IOREMAP_WC:
+		addr = ioremap_wc(offset, size);
+		break;
+	}
+
 	if (addr) {
 		*ptr = addr;
 		devres_add(dev, ptr);
@@ -41,6 +51,20 @@ void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
 
 	return addr;
 }
+
+/**
+ * devm_ioremap - Managed ioremap()
+ * @dev: Generic device to remap IO address for
+ * @offset: Resource address to map
+ * @size: Size of map
+ *
+ * Managed ioremap().  Map is automatically unmapped on driver detach.
+ */
+void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
+			   resource_size_t size)
+{
+	return __devm_ioremap(dev, offset, size, DEVM_IOREMAP);
+}
 EXPORT_SYMBOL(devm_ioremap);
 
 /**
@@ -55,20 +79,7 @@ EXPORT_SYMBOL(devm_ioremap);
 void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
 				   resource_size_t size)
 {
-	void __iomem **ptr, *addr;
-
-	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return NULL;
-
-	addr = ioremap_nocache(offset, size);
-	if (addr) {
-		*ptr = addr;
-		devres_add(dev, ptr);
-	} else
-		devres_free(ptr);
-
-	return addr;
+	return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_NC);
 }
 EXPORT_SYMBOL(devm_ioremap_nocache);
 
@@ -83,20 +94,7 @@ EXPORT_SYMBOL(devm_ioremap_nocache);
 void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
 			      resource_size_t size)
 {
-	void __iomem **ptr, *addr;
-
-	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return NULL;
-
-	addr = ioremap_wc(offset, size);
-	if (addr) {
-		*ptr = addr;
-		devres_add(dev, ptr);
-	} else
-		devres_free(ptr);
-
-	return addr;
+	return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_WC);
 }
 EXPORT_SYMBOL(devm_ioremap_wc);
 
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 1b34d210452c..7f5cdc1e6b29 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1491,12 +1491,12 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
 	if (unlikely(virt == NULL))
 		return;
 
-	entry = dma_entry_alloc();
-	if (!entry)
+	/* handle vmalloc and linear addresses */
+	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
 		return;
 
-	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+	entry = dma_entry_alloc();
+	if (!entry)
 		return;
 
 	entry->type      = dma_debug_coherent;
@@ -1528,7 +1528,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
 	};
 
 	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
 		return;
 
 	if (is_vmalloc_addr(virt))
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
new file mode 100644
index 000000000000..bbfb229aa067
--- /dev/null
+++ b/lib/dma-direct.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DMA operations that map physical memory directly without using an IOMMU or
+ * flushing caches.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-direct.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-contiguous.h>
+#include <linux/pfn.h>
+#include <linux/set_memory.h>
+
+#define DIRECT_MAPPING_ERROR		0
+
+/*
+ * Most architectures use ZONE_DMA for the first 16 Megabytes, but
+ * some use it for entirely different regions:
+ */
+#ifndef ARCH_ZONE_DMA_BITS
+#define ARCH_ZONE_DMA_BITS 24
+#endif
+
+/*
+ * For AMD SEV all DMA must be to unencrypted addresses.
+ */
+static inline bool force_dma_unencrypted(void)
+{
+	return sev_active();
+}
+
+static bool
+check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
+		const char *caller)
+{
+	if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
+		if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
+			dev_err(dev,
+				"%s: overflow %pad+%zu of device mask %llx\n",
+				caller, &dma_addr, size, *dev->dma_mask);
+		}
+		return false;
+	}
+	return true;
+}
+
+static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
+{
+	dma_addr_t addr = force_dma_unencrypted() ?
+		__phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
+	return addr + size - 1 <= dev->coherent_dma_mask;
+}
+
+void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
+{
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	int page_order = get_order(size);
+	struct page *page = NULL;
+	void *ret;
+
+	/* we always manually zero the memory once we are done: */
+	gfp &= ~__GFP_ZERO;
+
+	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
+	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+		gfp |= GFP_DMA;
+	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
+		gfp |= GFP_DMA32;
+
+again:
+	/* CMA can be used only in the context which permits sleeping */
+	if (gfpflags_allow_blocking(gfp)) {
+		page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
+		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+			dma_release_from_contiguous(dev, page, count);
+			page = NULL;
+		}
+	}
+	if (!page)
+		page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
+
+	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+		__free_pages(page, page_order);
+		page = NULL;
+
+		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
+		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+		    !(gfp & GFP_DMA)) {
+			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
+			goto again;
+		}
+	}
+
+	if (!page)
+		return NULL;
+	ret = page_address(page);
+	if (force_dma_unencrypted()) {
+		set_memory_decrypted((unsigned long)ret, 1 << page_order);
+		*dma_handle = __phys_to_dma(dev, page_to_phys(page));
+	} else {
+		*dma_handle = phys_to_dma(dev, page_to_phys(page));
+	}
+	memset(ret, 0, size);
+	return ret;
+}
+
+/*
+ * NOTE: this function must never look at the dma_addr argument, because we want
+ * to be able to use it as a helper for iommu implementations as well.
+ */
+void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t dma_addr, unsigned long attrs)
+{
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int page_order = get_order(size);
+
+	if (force_dma_unencrypted())
+		set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
+	if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
+		free_pages((unsigned long)cpu_addr, page_order);
+}
+
+static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
+
+	if (!check_addr(dev, dma_addr, size, __func__))
+		return DIRECT_MAPPING_ERROR;
+	return dma_addr;
+}
+
+static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
+		if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
+			return 0;
+		sg_dma_len(sg) = sg->length;
+	}
+
+	return nents;
+}
+
+int dma_direct_supported(struct device *dev, u64 mask)
+{
+#ifdef CONFIG_ZONE_DMA
+	if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+		return 0;
+#else
+	/*
+	 * Because 32-bit DMA masks are so common we expect every architecture
+	 * to be able to satisfy them - either by not supporting more physical
+	 * memory, or by providing a ZONE_DMA32.  If neither is the case, the
+	 * architecture needs to use an IOMMU instead of the direct mapping.
+	 */
+	if (mask < DMA_BIT_MASK(32))
+		return 0;
+#endif
+	return 1;
+}
+
+static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == DIRECT_MAPPING_ERROR;
+}
+
+const struct dma_map_ops dma_direct_ops = {
+	.alloc			= dma_direct_alloc,
+	.free			= dma_direct_free,
+	.map_page		= dma_direct_map_page,
+	.map_sg			= dma_direct_map_sg,
+	.dma_supported		= dma_direct_supported,
+	.mapping_error		= dma_direct_mapping_error,
+	.is_phys		= 1,
+};
+EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
deleted file mode 100644
index a10185b0c2d4..000000000000
--- a/lib/dma-noop.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *	lib/dma-noop.c
- *
- * DMA operations that map to physical addresses without flushing memory.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-#include <linux/pfn.h>
-
-static void *dma_noop_alloc(struct device *dev, size_t size,
-			    dma_addr_t *dma_handle, gfp_t gfp,
-			    unsigned long attrs)
-{
-	void *ret;
-
-	ret = (void *)__get_free_pages(gfp, get_order(size));
-	if (ret)
-		*dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
-
-	return ret;
-}
-
-static void dma_noop_free(struct device *dev, size_t size,
-			  void *cpu_addr, dma_addr_t dma_addr,
-			  unsigned long attrs)
-{
-	free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
-				      unsigned long offset, size_t size,
-				      enum dma_data_direction dir,
-				      unsigned long attrs)
-{
-	return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset);
-}
-
-static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
-			     enum dma_data_direction dir,
-			     unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgl, sg, nents, i) {
-		dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
-		void *va;
-
-		BUG_ON(!sg_page(sg));
-		va = sg_virt(sg);
-		sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset;
-		sg_dma_len(sg) = sg->length;
-	}
-
-	return nents;
-}
-
-const struct dma_map_ops dma_noop_ops = {
-	.alloc			= dma_noop_alloc,
-	.free			= dma_noop_free,
-	.map_page		= dma_noop_map_page,
-	.map_sg			= dma_noop_map_sg,
-};
-
-EXPORT_SYMBOL(dma_noop_ops);
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index c5edbedd364d..5cff72f18c4a 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -10,6 +10,66 @@
 #include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/atomic.h>
+#include <linux/kexec.h>
+#include <linux/utsname.h>
+
+static char dump_stack_arch_desc_str[128];
+
+/**
+ * dump_stack_set_arch_desc - set arch-specific str to show with task dumps
+ * @fmt: printf-style format string
+ * @...: arguments for the format string
+ *
+ * The configured string will be printed right after utsname during task
+ * dumps.  Usually used to add arch-specific system identifiers.  If an
+ * arch wants to make use of such an ID string, it should initialize this
+ * as soon as possible during boot.
+ */
+void __init dump_stack_set_arch_desc(const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(dump_stack_arch_desc_str, sizeof(dump_stack_arch_desc_str),
+		  fmt, args);
+	va_end(args);
+}
+
+/**
+ * dump_stack_print_info - print generic debug info for dump_stack()
+ * @log_lvl: log level
+ *
+ * Arch-specific dump_stack() implementations can use this function to
+ * print out the same debug information as the generic dump_stack().
+ */
+void dump_stack_print_info(const char *log_lvl)
+{
+	printk("%sCPU: %d PID: %d Comm: %.20s %s%s %s %.*s\n",
+	       log_lvl, raw_smp_processor_id(), current->pid, current->comm,
+	       kexec_crash_loaded() ? "Kdump: loaded " : "",
+	       print_tainted(),
+	       init_utsname()->release,
+	       (int)strcspn(init_utsname()->version, " "),
+	       init_utsname()->version);
+
+	if (dump_stack_arch_desc_str[0] != '\0')
+		printk("%sHardware name: %s\n",
+		       log_lvl, dump_stack_arch_desc_str);
+
+	print_worker_info(log_lvl, current);
+}
+
+/**
+ * show_regs_print_info - print generic debug info for show_regs()
+ * @log_lvl: log level
+ *
+ * show_regs() implementations can use this function to print out generic
+ * debug information.
+ */
+void show_regs_print_info(const char *log_lvl)
+{
+	dump_stack_print_info(log_lvl);
+}
 
 static void __dump_stack(void)
 {
diff --git a/lib/error-inject.c b/lib/error-inject.c
new file mode 100644
index 000000000000..c0d4600f4896
--- /dev/null
+++ b/lib/error-inject.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+// error-inject.c: Function-level error injection table
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+/* Whitelist of symbols that can be overridden for error injection. */
+static LIST_HEAD(error_injection_list);
+static DEFINE_MUTEX(ei_mutex);
+struct ei_entry {
+	struct list_head list;
+	unsigned long start_addr;
+	unsigned long end_addr;
+	int etype;
+	void *priv;
+};
+
+bool within_error_injection_list(unsigned long addr)
+{
+	struct ei_entry *ent;
+	bool ret = false;
+
+	mutex_lock(&ei_mutex);
+	list_for_each_entry(ent, &error_injection_list, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr) {
+			ret = true;
+			break;
+		}
+	}
+	mutex_unlock(&ei_mutex);
+	return ret;
+}
+
+int get_injectable_error_type(unsigned long addr)
+{
+	struct ei_entry *ent;
+
+	list_for_each_entry(ent, &error_injection_list, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return ent->etype;
+	}
+	return EI_ETYPE_NONE;
+}
+
+/*
+ * Lookup and populate the error_injection_list.
+ *
+ * For safety reasons we only allow certain functions to be overridden with
+ * bpf_error_injection, so we need to populate the list of the symbols that have
+ * been marked as safe for overriding.
+ */
+static void populate_error_injection_list(struct error_injection_entry *start,
+					  struct error_injection_entry *end,
+					  void *priv)
+{
+	struct error_injection_entry *iter;
+	struct ei_entry *ent;
+	unsigned long entry, offset = 0, size = 0;
+
+	mutex_lock(&ei_mutex);
+	for (iter = start; iter < end; iter++) {
+		entry = arch_deref_entry_point((void *)iter->addr);
+
+		if (!kernel_text_address(entry) ||
+		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
+			pr_err("Failed to find error inject entry at %p\n",
+				(void *)entry);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			break;
+		ent->start_addr = entry;
+		ent->end_addr = entry + size;
+		ent->etype = iter->etype;
+		ent->priv = priv;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &error_injection_list);
+	}
+	mutex_unlock(&ei_mutex);
+}
+
+/* Markers of the _error_inject_whitelist section */
+extern struct error_injection_entry __start_error_injection_whitelist[];
+extern struct error_injection_entry __stop_error_injection_whitelist[];
+
+static void __init populate_kernel_ei_list(void)
+{
+	populate_error_injection_list(__start_error_injection_whitelist,
+				      __stop_error_injection_whitelist,
+				      NULL);
+}
+
+#ifdef CONFIG_MODULES
+static void module_load_ei_list(struct module *mod)
+{
+	if (!mod->num_ei_funcs)
+		return;
+
+	populate_error_injection_list(mod->ei_funcs,
+				      mod->ei_funcs + mod->num_ei_funcs, mod);
+}
+
+static void module_unload_ei_list(struct module *mod)
+{
+	struct ei_entry *ent, *n;
+
+	if (!mod->num_ei_funcs)
+		return;
+
+	mutex_lock(&ei_mutex);
+	list_for_each_entry_safe(ent, n, &error_injection_list, list) {
+		if (ent->priv == mod) {
+			list_del_init(&ent->list);
+			kfree(ent);
+		}
+	}
+	mutex_unlock(&ei_mutex);
+}
+
+/* Module notifier call back, checking error injection table on the module */
+static int ei_module_callback(struct notifier_block *nb,
+			      unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	if (val == MODULE_STATE_COMING)
+		module_load_ei_list(mod);
+	else if (val == MODULE_STATE_GOING)
+		module_unload_ei_list(mod);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ei_module_nb = {
+	.notifier_call = ei_module_callback,
+	.priority = 0
+};
+
+static __init int module_ei_init(void)
+{
+	return register_module_notifier(&ei_module_nb);
+}
+#else /* !CONFIG_MODULES */
+#define module_ei_init()	(0)
+#endif
+
+/*
+ * error_injection/whitelist -- shows which functions can be overridden for
+ * error injection.
+ */
+static void *ei_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&ei_mutex);
+	return seq_list_start(&error_injection_list, *pos);
+}
+
+static void ei_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&ei_mutex);
+}
+
+static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &error_injection_list, pos);
+}
+
+static const char *error_type_string(int etype)
+{
+	switch (etype) {
+	case EI_ETYPE_NULL:
+		return "NULL";
+	case EI_ETYPE_ERRNO:
+		return "ERRNO";
+	case EI_ETYPE_ERRNO_NULL:
+		return "ERRNO_NULL";
+	default:
+		return "(unknown)";
+	}
+}
+
+static int ei_seq_show(struct seq_file *m, void *v)
+{
+	struct ei_entry *ent = list_entry(v, struct ei_entry, list);
+
+	seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr,
+		   error_type_string(ent->etype));
+	return 0;
+}
+
+static const struct seq_operations ei_seq_ops = {
+	.start = ei_seq_start,
+	.next  = ei_seq_next,
+	.stop  = ei_seq_stop,
+	.show  = ei_seq_show,
+};
+
+static int ei_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &ei_seq_ops);
+}
+
+static const struct file_operations debugfs_ei_ops = {
+	.open           = ei_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static int __init ei_debugfs_init(void)
+{
+	struct dentry *dir, *file;
+
+	dir = debugfs_create_dir("error_injection", NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops);
+	if (!file) {
+		debugfs_remove(dir);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int __init init_error_injection(void)
+{
+	populate_kernel_ei_list();
+
+	if (!module_ei_init())
+		ei_debugfs_init();
+
+	return 0;
+}
+late_initcall(init_error_injection);
diff --git a/lib/errseq.c b/lib/errseq.c
index 79cc66897db4..df782418b333 100644
--- a/lib/errseq.c
+++ b/lib/errseq.c
@@ -46,14 +46,14 @@
  * @eseq: errseq_t field that should be set
  * @err: error to set (must be between -1 and -MAX_ERRNO)
  *
- * This function sets the error in *eseq, and increments the sequence counter
+ * This function sets the error in @eseq, and increments the sequence counter
  * if the last sequence was sampled at some point in the past.
  *
  * Any error set will always overwrite an existing error.
  *
- * We do return the latest value here, primarily for debugging purposes. The
- * return value should not be used as a previously sampled value in later calls
- * as it will not have the SEEN flag set.
+ * Return: The previous value, primarily for debugging purposes. The
+ * return value should not be used as a previously sampled value in later
+ * calls as it will not have the SEEN flag set.
  */
 errseq_t errseq_set(errseq_t *eseq, int err)
 {
@@ -108,11 +108,13 @@ errseq_t errseq_set(errseq_t *eseq, int err)
 EXPORT_SYMBOL(errseq_set);
 
 /**
- * errseq_sample - grab current errseq_t value
- * @eseq: pointer to errseq_t to be sampled
+ * errseq_sample() - Grab current errseq_t value.
+ * @eseq: Pointer to errseq_t to be sampled.
  *
  * This function allows callers to sample an errseq_t value, marking it as
  * "seen" if required.
+ *
+ * Return: The current errseq value.
  */
 errseq_t errseq_sample(errseq_t *eseq)
 {
@@ -134,15 +136,15 @@ errseq_t errseq_sample(errseq_t *eseq)
 EXPORT_SYMBOL(errseq_sample);
 
 /**
- * errseq_check - has an error occurred since a particular sample point?
- * @eseq: pointer to errseq_t value to be checked
- * @since: previously-sampled errseq_t from which to check
+ * errseq_check() - Has an error occurred since a particular sample point?
+ * @eseq: Pointer to errseq_t value to be checked.
+ * @since: Previously-sampled errseq_t from which to check.
  *
- * Grab the value that eseq points to, and see if it has changed "since"
- * the given value was sampled. The "since" value is not advanced, so there
+ * Grab the value that eseq points to, and see if it has changed @since
+ * the given value was sampled. The @since value is not advanced, so there
  * is no need to mark the value as seen.
  *
- * Returns the latest error set in the errseq_t or 0 if it hasn't changed.
+ * Return: The latest error set in the errseq_t or 0 if it hasn't changed.
  */
 int errseq_check(errseq_t *eseq, errseq_t since)
 {
@@ -155,11 +157,11 @@ int errseq_check(errseq_t *eseq, errseq_t since)
 EXPORT_SYMBOL(errseq_check);
 
 /**
- * errseq_check_and_advance - check an errseq_t and advance to current value
- * @eseq: pointer to value being checked and reported
- * @since: pointer to previously-sampled errseq_t to check against and advance
+ * errseq_check_and_advance() - Check an errseq_t and advance to current value.
+ * @eseq: Pointer to value being checked and reported.
+ * @since: Pointer to previously-sampled errseq_t to check against and advance.
  *
- * Grab the eseq value, and see whether it matches the value that "since"
+ * Grab the eseq value, and see whether it matches the value that @since
  * points to. If it does, then just return 0.
  *
  * If it doesn't, then the value has changed. Set the "seen" flag, and try to
@@ -170,6 +172,9 @@ EXPORT_SYMBOL(errseq_check);
  * value. The caller must provide that if necessary. Because of this, callers
  * may want to do a lockless errseq_check before taking the lock and calling
  * this.
+ *
+ * Return: Negative errno if one has been stored, or 0 if no new error has
+ * occurred.
  */
 int errseq_check_and_advance(errseq_t *eseq, errseq_t *since)
 {
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 6ed74f78380c..ee3df93ba69a 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -21,22 +21,29 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 
-#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
+		!defined(find_next_and_bit)
 
 /*
- * This is a common helper function for find_next_bit and
- * find_next_zero_bit.  The difference is the "invert" argument, which
- * is XORed with each fetched word before searching it for one bits.
+ * This is a common helper function for find_next_bit, find_next_zero_bit, and
+ * find_next_and_bit. The differences are:
+ *  - The "invert" argument, which is XORed with each fetched word before
+ *    searching it for one bits.
+ *  - The optional "addr2", which is anded with "addr1" if present.
  */
-static unsigned long _find_next_bit(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, unsigned long invert)
+static inline unsigned long _find_next_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
 
 	if (unlikely(start >= nbits))
 		return nbits;
 
-	tmp = addr[start / BITS_PER_LONG] ^ invert;
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
 
 	/* Handle 1st word. */
 	tmp &= BITMAP_FIRST_WORD_MASK(start);
@@ -47,7 +54,10 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 		if (start >= nbits)
 			return nbits;
 
-		tmp = addr[start / BITS_PER_LONG] ^ invert;
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
 
 	return min(start + __ffs(tmp), nbits);
@@ -61,7 +71,7 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	return _find_next_bit(addr, size, offset, 0UL);
+	return _find_next_bit(addr, NULL, size, offset, 0UL);
 }
 EXPORT_SYMBOL(find_next_bit);
 #endif
@@ -70,11 +80,21 @@ EXPORT_SYMBOL(find_next_bit);
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	return _find_next_bit(addr, size, offset, ~0UL);
+	return _find_next_bit(addr, NULL, size, offset, ~0UL);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif
 
+#if !defined(find_next_and_bit)
+unsigned long find_next_and_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long size,
+		unsigned long offset)
+{
+	return _find_next_bit(addr1, addr2, size, offset, 0UL);
+}
+EXPORT_SYMBOL(find_next_and_bit);
+#endif
+
 #ifndef find_first_bit
 /*
  * Find the first set bit in a memory region.
@@ -146,15 +166,19 @@ static inline unsigned long ext2_swab(const unsigned long y)
 }
 
 #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
-static unsigned long _find_next_bit_le(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, unsigned long invert)
+static inline unsigned long _find_next_bit_le(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
 
 	if (unlikely(start >= nbits))
 		return nbits;
 
-	tmp = addr[start / BITS_PER_LONG] ^ invert;
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
 
 	/* Handle 1st word. */
 	tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
@@ -165,7 +189,10 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
 		if (start >= nbits)
 			return nbits;
 
-		tmp = addr[start / BITS_PER_LONG] ^ invert;
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
 
 	return min(start + __ffs(ext2_swab(tmp)), nbits);
@@ -176,7 +203,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	return _find_next_bit_le(addr, size, offset, ~0UL);
+	return _find_next_bit_le(addr, NULL, size, offset, ~0UL);
 }
 EXPORT_SYMBOL(find_next_zero_bit_le);
 #endif
@@ -185,7 +212,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	return _find_next_bit_le(addr, size, offset, 0UL);
+	return _find_next_bit_le(addr, NULL, size, offset, 0UL);
 }
 EXPORT_SYMBOL(find_next_bit_le);
 #endif
diff --git a/lib/test_find_bit.c b/lib/find_bit_benchmark.c
index f4394a36f9aa..5985a25e6cbc 100644
--- a/lib/test_find_bit.c
+++ b/lib/find_bit_benchmark.c
@@ -35,6 +35,7 @@
 #define SPARSE		500
 
 static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata;
+static DECLARE_BITMAP(bitmap2, BITMAP_LEN) __initdata;
 
 /*
  * This is Schlemiel the Painter's algorithm. It should be called after
@@ -43,16 +44,15 @@ static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata;
 static int __init test_find_first_bit(void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < len; cnt++) {
 		i = find_first_bit(bitmap, len);
 		__clear_bit(i, bitmap);
 	}
-	cycles = get_cycles() - cycles;
-	pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_first_bit:     %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -60,14 +60,13 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len)
 static int __init test_find_next_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
 		i = find_next_bit(bitmap, BITMAP_LEN, i) + 1;
-	cycles = get_cycles() - cycles;
-	pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_next_bit:      %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -75,14 +74,13 @@ static int __init test_find_next_bit(const void *bitmap, unsigned long len)
 static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
 		i = find_next_zero_bit(bitmap, len, i) + 1;
-	cycles = get_cycles() - cycles;
-	pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_next_zero_bit: %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -90,9 +88,9 @@ static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len)
 static int __init test_find_last_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long l, cnt = 0;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	do {
 		cnt++;
 		l = find_last_bit(bitmap, len);
@@ -100,9 +98,24 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len)
 			break;
 		len = l;
 	} while (len);
+	time = ktime_get() - time;
+	pr_err("find_last_bit:      %18llu ns, %6ld iterations\n", time, cnt);
+
+	return 0;
+}
+
+static int __init test_find_next_and_bit(const void *bitmap,
+		const void *bitmap2, unsigned long len)
+{
+	unsigned long i, cnt;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
+		i = find_next_and_bit(bitmap, bitmap2, BITMAP_LEN, i+1);
 	cycles = get_cycles() - cycles;
-	pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	pr_err("find_next_and_bit:\t\t%llu cycles, %ld iterations\n",
+		(u64)cycles, cnt);
 
 	return 0;
 }
@@ -114,31 +127,36 @@ static int __init find_bit_test(void)
 	pr_err("\nStart testing find_bit() with random-filled bitmap\n");
 
 	get_random_bytes(bitmap, sizeof(bitmap));
+	get_random_bytes(bitmap2, sizeof(bitmap2));
 
 	test_find_next_bit(bitmap, BITMAP_LEN);
 	test_find_next_zero_bit(bitmap, BITMAP_LEN);
 	test_find_last_bit(bitmap, BITMAP_LEN);
 	test_find_first_bit(bitmap, BITMAP_LEN);
+	test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
 
 	pr_err("\nStart testing find_bit() with sparse bitmap\n");
 
 	bitmap_zero(bitmap, BITMAP_LEN);
+	bitmap_zero(bitmap2, BITMAP_LEN);
 
-	while (nbits--)
+	while (nbits--) {
 		__set_bit(prandom_u32() % BITMAP_LEN, bitmap);
+		__set_bit(prandom_u32() % BITMAP_LEN, bitmap2);
+	}
 
 	test_find_next_bit(bitmap, BITMAP_LEN);
 	test_find_next_zero_bit(bitmap, BITMAP_LEN);
 	test_find_last_bit(bitmap, BITMAP_LEN);
 	test_find_first_bit(bitmap, BITMAP_LEN);
+	test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
 
-	return 0;
+	/*
+	 * Everything is OK. Return error just to let user run benchmark
+	 * again without annoying rmmod.
+	 */
+	return -EINVAL;
 }
 module_init(find_bit_test);
 
-static void __exit test_find_bit_cleanup(void)
-{
-}
-module_exit(test_find_bit_cleanup);
-
 MODULE_LICENSE("GPL");
diff --git a/lib/idr.c b/lib/idr.c
index 2593ce513a18..823b813f08f8 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -1,4 +1,5 @@
 #include <linux/bitmap.h>
+#include <linux/bug.h>
 #include <linux/export.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
@@ -7,71 +8,184 @@
 DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
-int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index,
-		  unsigned long start, unsigned long end, gfp_t gfp,
-		  bool ext)
+/**
+ * idr_alloc_u32() - Allocate an ID.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @nextid: Pointer to an ID.
+ * @max: The maximum ID to allocate (inclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @nextid and @max.
+ * Note that @max is inclusive whereas the @end parameter to idr_alloc()
+ * is exclusive.  The new ID is assigned to @nextid before the pointer
+ * is inserted into the IDR, so if @nextid points into the object pointed
+ * to by @ptr, a concurrent lookup will not find an uninitialised ID.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: 0 if an ID was allocated, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.  If an error occurred,
+ * @nextid is unchanged.
+ */
+int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
+			unsigned long max, gfp_t gfp)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	unsigned int base = idr->idr_base;
+	unsigned int id = *nextid;
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return -EINVAL;
+	if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
+		idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
 
-	radix_tree_iter_init(&iter, start);
-	if (ext)
-		slot = idr_get_free_ext(&idr->idr_rt, &iter, gfp, end);
-	else
-		slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
+	id = (id < base) ? 0 : id - base;
+	radix_tree_iter_init(&iter, id);
+	slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
 	if (IS_ERR(slot))
 		return PTR_ERR(slot);
 
+	*nextid = iter.index + base;
+	/* there is a memory barrier inside radix_tree_iter_replace() */
 	radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
 	radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
 
-	if (index)
-		*index = iter.index;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(idr_alloc_cmn);
+EXPORT_SYMBOL_GPL(idr_alloc_u32);
 
 /**
- * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
- * @idr: idr handle
- * @ptr: pointer to be associated with the new id
- * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive)
- * @gfp: memory allocation flags
- *
- * Allocates an ID larger than the last ID allocated if one is available.
- * If not, it will attempt to allocate the smallest ID that is larger or
- * equal to @start.
+ * idr_alloc() - Allocate an ID.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @start: The minimum ID (inclusive).
+ * @end: The maximum ID (exclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @start and @end.  If
+ * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
+ * callers to use @start + N as @end as long as N is within integer range.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: The newly allocated ID, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.
  */
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
+int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-	int id, curr = idr->idr_next;
+	u32 id = start;
+	int ret;
+
+	if (WARN_ON_ONCE(start < 0))
+		return -EINVAL;
+
+	ret = idr_alloc_u32(idr, ptr, &id, end > 0 ? end - 1 : INT_MAX, gfp);
+	if (ret)
+		return ret;
+
+	return id;
+}
+EXPORT_SYMBOL_GPL(idr_alloc);
 
-	if (curr < start)
-		curr = start;
+/**
+ * idr_alloc_cyclic() - Allocate an ID cyclically.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @start: The minimum ID (inclusive).
+ * @end: The maximum ID (exclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @nextid and @end.  If
+ * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
+ * callers to use @start + N as @end as long as N is within integer range.
+ * The search for an unused ID will start at the last ID allocated and will
+ * wrap around to @start if no free IDs are found before reaching @end.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: The newly allocated ID, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.
+ */
+int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
+{
+	u32 id = idr->idr_next;
+	int err, max = end > 0 ? end - 1 : INT_MAX;
 
-	id = idr_alloc(idr, ptr, curr, end, gfp);
-	if ((id == -ENOSPC) && (curr > start))
-		id = idr_alloc(idr, ptr, start, curr, gfp);
+	if ((int)id < start)
+		id = start;
 
-	if (id >= 0)
-		idr->idr_next = id + 1U;
+	err = idr_alloc_u32(idr, ptr, &id, max, gfp);
+	if ((err == -ENOSPC) && (id > start)) {
+		id = start;
+		err = idr_alloc_u32(idr, ptr, &id, max, gfp);
+	}
+	if (err)
+		return err;
 
+	idr->idr_next = id + 1;
 	return id;
 }
 EXPORT_SYMBOL(idr_alloc_cyclic);
 
 /**
- * idr_for_each - iterate through all stored pointers
- * @idr: idr handle
- * @fn: function to be called for each pointer
- * @data: data passed to callback function
+ * idr_remove() - Remove an ID from the IDR.
+ * @idr: IDR handle.
+ * @id: Pointer ID.
+ *
+ * Removes this ID from the IDR.  If the ID was not previously in the IDR,
+ * this function returns %NULL.
+ *
+ * Since this function modifies the IDR, the caller should provide their
+ * own locking to ensure that concurrent modification of the same IDR is
+ * not possible.
+ *
+ * Return: The pointer formerly associated with this ID.
+ */
+void *idr_remove(struct idr *idr, unsigned long id)
+{
+	return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL);
+}
+EXPORT_SYMBOL_GPL(idr_remove);
+
+/**
+ * idr_find() - Return pointer for given ID.
+ * @idr: IDR handle.
+ * @id: Pointer ID.
+ *
+ * Looks up the pointer associated with this ID.  A %NULL pointer may
+ * indicate that @id is not allocated or that the %NULL pointer was
+ * associated with this ID.
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
+ *
+ * Return: The pointer associated with this ID.
+ */
+void *idr_find(const struct idr *idr, unsigned long id)
+{
+	return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
+}
+EXPORT_SYMBOL_GPL(idr_find);
+
+/**
+ * idr_for_each() - Iterate through all stored pointers.
+ * @idr: IDR handle.
+ * @fn: Function to be called for each pointer.
+ * @data: Data passed to callback function.
  *
  * The callback function will be called for each entry in @idr, passing
- * the id, the pointer and the data pointer passed to this function.
+ * the ID, the entry and @data.
  *
  * If @fn returns anything other than %0, the iteration stops and that
  * value is returned from this function.
@@ -86,9 +200,15 @@ int idr_for_each(const struct idr *idr,
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	int base = idr->idr_base;
 
 	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
-		int ret = fn(iter.index, rcu_dereference_raw(*slot), data);
+		int ret;
+		unsigned long id = iter.index + base;
+
+		if (WARN_ON_ONCE(id > INT_MAX))
+			break;
+		ret = fn(id, rcu_dereference_raw(*slot), data);
 		if (ret)
 			return ret;
 	}
@@ -98,9 +218,9 @@ int idr_for_each(const struct idr *idr,
 EXPORT_SYMBOL(idr_for_each);
 
 /**
- * idr_get_next - Find next populated entry
- * @idr: idr handle
- * @nextid: Pointer to lowest possible ID to return
+ * idr_get_next() - Find next populated entry.
+ * @idr: IDR handle.
+ * @nextid: Pointer to an ID.
  *
  * Returns the next populated entry in the tree with an ID greater than
  * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
@@ -111,35 +231,55 @@ void *idr_get_next(struct idr *idr, int *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	unsigned long base = idr->idr_base;
+	unsigned long id = *nextid;
 
-	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	id = (id < base) ? 0 : id - base;
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
 	if (!slot)
 		return NULL;
+	id = iter.index + base;
+
+	if (WARN_ON_ONCE(id > INT_MAX))
+		return NULL;
 
-	*nextid = iter.index;
+	*nextid = id;
 	return rcu_dereference_raw(*slot);
 }
 EXPORT_SYMBOL(idr_get_next);
 
-void *idr_get_next_ext(struct idr *idr, unsigned long *nextid)
+/**
+ * idr_get_next_ul() - Find next populated entry.
+ * @idr: IDR handle.
+ * @nextid: Pointer to an ID.
+ *
+ * Returns the next populated entry in the tree with an ID greater than
+ * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
+ * to the ID of the found value.  To use in a loop, the value pointed to by
+ * nextid must be incremented by the user.
+ */
+void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	unsigned long base = idr->idr_base;
+	unsigned long id = *nextid;
 
-	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	id = (id < base) ? 0 : id - base;
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
 	if (!slot)
 		return NULL;
 
-	*nextid = iter.index;
+	*nextid = iter.index + base;
 	return rcu_dereference_raw(*slot);
 }
-EXPORT_SYMBOL(idr_get_next_ext);
+EXPORT_SYMBOL(idr_get_next_ul);
 
 /**
- * idr_replace - replace pointer for given id
- * @idr: idr handle
- * @ptr: New pointer to associate with the ID
- * @id: Lookup key
+ * idr_replace() - replace pointer for given ID.
+ * @idr: IDR handle.
+ * @ptr: New pointer to associate with the ID.
+ * @id: ID to change.
  *
  * Replace the pointer registered with an ID and return the old value.
  * This function can be called under the RCU read lock concurrently with
@@ -147,18 +287,9 @@ EXPORT_SYMBOL(idr_get_next_ext);
  * the one being replaced!).
  *
  * Returns: the old value on success.  %-ENOENT indicates that @id was not
- * found.  %-EINVAL indicates that @id or @ptr were not valid.
+ * found.  %-EINVAL indicates that @ptr was not valid.
  */
-void *idr_replace(struct idr *idr, void *ptr, int id)
-{
-	if (id < 0)
-		return ERR_PTR(-EINVAL);
-
-	return idr_replace_ext(idr, ptr, id);
-}
-EXPORT_SYMBOL(idr_replace);
-
-void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
+void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
 {
 	struct radix_tree_node *node;
 	void __rcu **slot = NULL;
@@ -166,6 +297,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return ERR_PTR(-EINVAL);
+	id -= idr->idr_base;
 
 	entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
 	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
@@ -175,7 +307,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
 
 	return entry;
 }
-EXPORT_SYMBOL(idr_replace_ext);
+EXPORT_SYMBOL(idr_replace);
 
 /**
  * DOC: IDA description
@@ -235,7 +367,7 @@ EXPORT_SYMBOL(idr_replace_ext);
  * bitmap, which is excessive.
  */
 
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
+#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
 
 /**
  * ida_get_new_above - allocate new ID above or equal to a start id
@@ -300,7 +432,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			memset(bitmap, 0, sizeof(*bitmap));
 			bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
 			rcu_assign_pointer(*slot, bitmap);
 		}
@@ -333,7 +464,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			memset(bitmap, 0, sizeof(*bitmap));
 			__set_bit(bit, bitmap->bitmap);
 			radix_tree_iter_replace(root, &iter, slot, bitmap);
 		}
diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
index e2d329099bf7..14436f4ca6bd 100644
--- a/lib/int_sqrt.c
+++ b/lib/int_sqrt.c
@@ -38,3 +38,33 @@ unsigned long int_sqrt(unsigned long x)
 	return y;
 }
 EXPORT_SYMBOL(int_sqrt);
+
+#if BITS_PER_LONG < 64
+/**
+ * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input
+ * is expected.
+ * @x: 64bit integer of which to calculate the sqrt
+ */
+u32 int_sqrt64(u64 x)
+{
+	u64 b, m, y = 0;
+
+	if (x <= ULONG_MAX)
+		return int_sqrt((unsigned long) x);
+
+	m = 1ULL << (fls64(x) & ~1ULL);
+	while (m != 0) {
+		b = y + m;
+		y >>= 1;
+
+		if (x >= b) {
+			x -= b;
+			y += m;
+		}
+		m >>= 2;
+	}
+
+	return y;
+}
+EXPORT_SYMBOL(int_sqrt64);
+#endif
diff --git a/lib/ioremap.c b/lib/ioremap.c
index b808a390e4c3..54e5bbaa3200 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 
 		if (ioremap_pmd_enabled() &&
 		    ((next - addr) == PMD_SIZE) &&
-		    IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
+		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
+		    pmd_free_pte_page(pmd)) {
 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
 				continue;
 		}
@@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 
 		if (ioremap_pud_enabled() &&
 		    ((next - addr) == PUD_SIZE) &&
-		    IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
+		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
+		    pud_free_pmd_page(pud)) {
 			if (pud_set_huge(pud, phys_addr + addr, prot))
 				continue;
 		}
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 90ba1eb1df06..b0f757bf7213 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -39,7 +39,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
 		size_t esize, gfp_t gfp_mask)
 {
 	/*
-	 * round down to the next power of 2, since our 'let the indices
+	 * round up to the next power of 2, since our 'let the indices
 	 * wrap' technique works only in this case.
 	 */
 	size = roundup_pow_of_two(size);
diff --git a/lib/kobject.c b/lib/kobject.c
index 763d70a18941..18989b5b3b56 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kobject.c - library routines for handling generic kernel objects
  *
@@ -5,9 +6,6 @@
  * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com>
  * Copyright (c) 2006-2007 Novell Inc.
  *
- * This file is released under the GPLv2.
- *
- *
  * Please see the file Documentation/kobject.txt for critical information
  * about using the kobject interface.
  */
@@ -206,8 +204,9 @@ static int kobject_add_internal(struct kobject *kobj)
 		return -ENOENT;
 
 	if (!kobj->name || !kobj->name[0]) {
-		WARN(1, "kobject: (%p): attempted to be registered with empty "
-			 "name!\n", kobj);
+		WARN(1,
+		     "kobject: (%p): attempted to be registered with empty name!\n",
+		     kobj);
 		return -EINVAL;
 	}
 
@@ -234,14 +233,12 @@ static int kobject_add_internal(struct kobject *kobj)
 
 		/* be noisy on error issues */
 		if (error == -EEXIST)
-			WARN(1, "%s failed for %s with "
-			     "-EEXIST, don't try to register things with "
-			     "the same name in the same directory.\n",
-			     __func__, kobject_name(kobj));
+			pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
+			       __func__, kobject_name(kobj));
 		else
-			WARN(1, "%s failed for %s (error: %d parent: %s)\n",
-			     __func__, kobject_name(kobj), error,
-			     parent ? kobject_name(parent) : "'none'");
+			pr_err("%s failed for %s (error: %d parent: %s)\n",
+			       __func__, kobject_name(kobj), error,
+			       parent ? kobject_name(parent) : "'none'");
 	} else
 		kobj->state_in_sysfs = 1;
 
@@ -336,8 +333,8 @@ void kobject_init(struct kobject *kobj, struct kobj_type *ktype)
 	}
 	if (kobj->state_initialized) {
 		/* do not error out as sometimes we can recover */
-		printk(KERN_ERR "kobject (%p): tried to init an initialized "
-		       "object, something is seriously wrong.\n", kobj);
+		pr_err("kobject (%p): tried to init an initialized object, something is seriously wrong.\n",
+		       kobj);
 		dump_stack();
 	}
 
@@ -346,7 +343,7 @@ void kobject_init(struct kobject *kobj, struct kobj_type *ktype)
 	return;
 
 error:
-	printk(KERN_ERR "kobject (%p): %s\n", kobj, err_str);
+	pr_err("kobject (%p): %s\n", kobj, err_str);
 	dump_stack();
 }
 EXPORT_SYMBOL(kobject_init);
@@ -359,7 +356,7 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,
 
 	retval = kobject_set_name_vargs(kobj, fmt, vargs);
 	if (retval) {
-		printk(KERN_ERR "kobject: can not set name properly!\n");
+		pr_err("kobject: can not set name properly!\n");
 		return retval;
 	}
 	kobj->parent = parent;
@@ -401,8 +398,7 @@ int kobject_add(struct kobject *kobj, struct kobject *parent,
 		return -EINVAL;
 
 	if (!kobj->state_initialized) {
-		printk(KERN_ERR "kobject '%s' (%p): tried to add an "
-		       "uninitialized object, something is seriously wrong.\n",
+		pr_err("kobject '%s' (%p): tried to add an uninitialized object, something is seriously wrong.\n",
 		       kobject_name(kobj), kobj);
 		dump_stack();
 		return -EINVAL;
@@ -592,9 +588,9 @@ struct kobject *kobject_get(struct kobject *kobj)
 {
 	if (kobj) {
 		if (!kobj->state_initialized)
-			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
-			       "initialized, yet kobject_get() is being "
-			       "called.\n", kobject_name(kobj), kobj);
+			WARN(1, KERN_WARNING
+				"kobject: '%s' (%p): is not initialized, yet kobject_get() is being called.\n",
+			     kobject_name(kobj), kobj);
 		kref_get(&kobj->kref);
 	}
 	return kobj;
@@ -624,8 +620,7 @@ static void kobject_cleanup(struct kobject *kobj)
 		 kobject_name(kobj), kobj, __func__, kobj->parent);
 
 	if (t && !t->release)
-		pr_debug("kobject: '%s' (%p): does not have a release() "
-			 "function, it is broken and must be fixed.\n",
+		pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed.\n",
 			 kobject_name(kobj), kobj);
 
 	/* send "remove" if the caller did not do it but sent "add" */
@@ -688,9 +683,9 @@ void kobject_put(struct kobject *kobj)
 {
 	if (kobj) {
 		if (!kobj->state_initialized)
-			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
-			       "initialized, yet kobject_put() is being "
-			       "called.\n", kobject_name(kobj), kobj);
+			WARN(1, KERN_WARNING
+				"kobject: '%s' (%p): is not initialized, yet kobject_put() is being called.\n",
+			     kobject_name(kobj), kobj);
 		kref_put(&kobj->kref, kobject_release);
 	}
 }
@@ -754,8 +749,7 @@ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent)
 
 	retval = kobject_add(kobj, parent, "%s", name);
 	if (retval) {
-		printk(KERN_WARNING "%s: kobject_add error: %d\n",
-		       __func__, retval);
+		pr_warn("%s: kobject_add error: %d\n", __func__, retval);
 		kobject_put(kobj);
 		kobj = NULL;
 	}
@@ -1039,6 +1033,7 @@ void *kobj_ns_grab_current(enum kobj_ns_type type)
 
 	return ns;
 }
+EXPORT_SYMBOL_GPL(kobj_ns_grab_current);
 
 const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
 {
@@ -1074,3 +1069,4 @@ void kobj_ns_drop(enum kobj_ns_type type, void *ns)
 		kobj_ns_ops_tbl[type]->drop_ns(ns);
 	spin_unlock(&kobj_ns_type_lock);
 }
+EXPORT_SYMBOL_GPL(kobj_ns_drop);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 2615074d3de5..15ea216a67ce 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kernel userspace event delivery
  *
@@ -5,8 +6,6 @@
  * Copyright (C) 2004 Novell, Inc.  All rights reserved.
  * Copyright (C) 2004 IBM, Inc. All rights reserved.
  *
- * Licensed under the GNU GPL v2.
- *
  * Authors:
  *	Robert Love		<rml@novell.com>
  *	Kay Sievers		<kay.sievers@vrfy.org>
@@ -26,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 
 
@@ -33,11 +33,13 @@ u64 uevent_seqnum;
 #ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 #endif
-#ifdef CONFIG_NET
+
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
+
+#ifdef CONFIG_NET
 static LIST_HEAD(uevent_sock_list);
 #endif
 
@@ -603,12 +605,88 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
+static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
+				struct netlink_ext_ack *extack)
+{
+	/* u64 to chars: 2^64 - 1 = 21 chars */
+	char buf[sizeof("SEQNUM=") + 21];
+	struct sk_buff *skbc;
+	int ret;
+
+	/* bump and prepare sequence number */
+	ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", ++uevent_seqnum);
+	if (ret < 0 || (size_t)ret >= sizeof(buf))
+		return -ENOMEM;
+	ret++;
+
+	/* verify message does not overflow */
+	if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
+		NL_SET_ERR_MSG(extack, "uevent message too big");
+		return -EINVAL;
+	}
+
+	/* copy skb and extend to accommodate sequence number */
+	skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
+	if (!skbc)
+		return -ENOMEM;
+
+	/* append sequence number */
+	skb_put_data(skbc, buf, ret);
+
+	/* remove msg header */
+	skb_pull(skbc, NLMSG_HDRLEN);
+
+	/* set portid 0 to inform userspace message comes from kernel */
+	NETLINK_CB(skbc).portid = 0;
+	NETLINK_CB(skbc).dst_group = 1;
+
+	ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+
+static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct netlink_ext_ack *extack)
+{
+	struct net *net;
+	int ret;
+
+	if (!nlmsg_data(nlh))
+		return -EINVAL;
+
+	/*
+	 * Verify that we are allowed to send messages to the target
+	 * network namespace. The caller must have CAP_SYS_ADMIN in the
+	 * owning user namespace of the target network namespace.
+	 */
+	net = sock_net(NETLINK_CB(skb).sk);
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
+		NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
+		return -EPERM;
+	}
+
+	mutex_lock(&uevent_sock_mutex);
+	ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);
+	mutex_unlock(&uevent_sock_mutex);
+
+	return ret;
+}
+
+static void uevent_net_rcv(struct sk_buff *skb)
+{
+	netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
 static int uevent_net_init(struct net *net)
 {
 	struct uevent_sock *ue_sk;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= 1,
-		.flags	= NL_CFG_F_NONROOT_RECV,
+		.input = uevent_net_rcv,
+		.flags	= NL_CFG_F_NONROOT_RECV
 	};
 
 	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
@@ -622,6 +700,9 @@ static int uevent_net_init(struct net *net)
 		kfree(ue_sk);
 		return -ENODEV;
 	}
+
+	net->uevent_sock = ue_sk;
+
 	mutex_lock(&uevent_sock_mutex);
 	list_add_tail(&ue_sk->list, &uevent_sock_list);
 	mutex_unlock(&uevent_sock_mutex);
@@ -630,17 +711,9 @@ static int uevent_net_init(struct net *net)
 
 static void uevent_net_exit(struct net *net)
 {
-	struct uevent_sock *ue_sk;
+	struct uevent_sock *ue_sk = net->uevent_sock;
 
 	mutex_lock(&uevent_sock_mutex);
-	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
-		if (sock_net(ue_sk->sk) == net)
-			goto found;
-	}
-	mutex_unlock(&uevent_sock_mutex);
-	return;
-
-found:
 	list_del(&ue_sk->list);
 	mutex_unlock(&uevent_sock_mutex);
 
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 9f79547d1b97..f0a2934605bf 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -71,6 +71,12 @@ static void __exit libcrc32c_mod_fini(void)
 	crypto_free_shash(tfm);
 }
 
+const char *crc32c_impl(void)
+{
+	return crypto_shash_driver_name(tfm);
+}
+EXPORT_SYMBOL(crc32c_impl);
+
 module_init(libcrc32c_mod_init);
 module_exit(libcrc32c_mod_fini);
 
diff --git a/lib/list_debug.c b/lib/list_debug.c
index a34db8d27667..5d5424b51b74 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -21,13 +21,13 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
 		      struct list_head *next)
 {
 	if (CHECK_DATA_CORRUPTION(next->prev != prev,
-			"list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+			"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
 			prev, next->prev, next) ||
 	    CHECK_DATA_CORRUPTION(prev->next != next,
-			"list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+			"list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
 			next, prev->next, prev) ||
 	    CHECK_DATA_CORRUPTION(new == prev || new == next,
-			"list_add double add: new=%p, prev=%p, next=%p.\n",
+			"list_add double add: new=%px, prev=%px, next=%px.\n",
 			new, prev, next))
 		return false;
 
@@ -43,16 +43,16 @@ bool __list_del_entry_valid(struct list_head *entry)
 	next = entry->next;
 
 	if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
-			"list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+			"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
 			entry, LIST_POISON1) ||
 	    CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
-			"list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+			"list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
 			entry, LIST_POISON2) ||
 	    CHECK_DATA_CORRUPTION(prev->next != entry,
-			"list_del corruption. prev->next should be %p, but was %p\n",
+			"list_del corruption. prev->next should be %px, but was %px\n",
 			entry, prev->next) ||
 	    CHECK_DATA_CORRUPTION(next->prev != entry,
-			"list_del corruption. next->prev should be %p, but was %p\n",
+			"list_del corruption. next->prev should be %px, but was %px\n",
 			entry, next->prev))
 		return false;
 
diff --git a/lib/lockref.c b/lib/lockref.c
index 47169ed7e964..3d468b53d4c9 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -81,6 +81,34 @@ int lockref_get_not_zero(struct lockref *lockref)
 EXPORT_SYMBOL(lockref_get_not_zero);
 
 /**
+ * lockref_put_not_zero - Decrements count unless count <= 1 before decrement
+ * @lockref: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count would become zero
+ */
+int lockref_put_not_zero(struct lockref *lockref)
+{
+	int retval;
+
+	CMPXCHG_LOOP(
+		new.count--;
+		if (old.count <= 1)
+			return 0;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	retval = 0;
+	if (lockref->count > 1) {
+		lockref->count--;
+		retval = 1;
+	}
+	spin_unlock(&lockref->lock);
+	return retval;
+}
+EXPORT_SYMBOL(lockref_put_not_zero);
+
+/**
  * lockref_get_or_lock - Increments count unless the count is 0 or dead
  * @lockref: pointer to lockref structure
  * Return: 1 if count updated successfully or 0 if count was zero
diff --git a/lib/logic_pio.c b/lib/logic_pio.c
new file mode 100644
index 000000000000..feea48fd1a0d
--- /dev/null
+++ b/lib/logic_pio.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2017 HiSilicon Limited, All Rights Reserved.
+ * Author: Gabriele Paoloni <gabriele.paoloni@huawei.com>
+ * Author: Zhichang Yuan <yuanzhichang@hisilicon.com>
+ */
+
+#define pr_fmt(fmt)	"LOGIC PIO: " fmt
+
+#include <linux/of.h>
+#include <linux/io.h>
+#include <linux/logic_pio.h>
+#include <linux/mm.h>
+#include <linux/rculist.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+/* The unique hardware address list */
+static LIST_HEAD(io_range_list);
+static DEFINE_MUTEX(io_range_mutex);
+
+/* Consider a kernel general helper for this */
+#define in_range(b, first, len)        ((b) >= (first) && (b) < (first) + (len))
+
+/**
+ * logic_pio_register_range - register logical PIO range for a host
+ * @new_range: pointer to the IO range to be registered.
+ *
+ * Returns 0 on success, the error code in case of failure.
+ *
+ * Register a new IO range node in the IO range list.
+ */
+int logic_pio_register_range(struct logic_pio_hwaddr *new_range)
+{
+	struct logic_pio_hwaddr *range;
+	resource_size_t start;
+	resource_size_t end;
+	resource_size_t mmio_sz = 0;
+	resource_size_t iio_sz = MMIO_UPPER_LIMIT;
+	int ret = 0;
+
+	if (!new_range || !new_range->fwnode || !new_range->size)
+		return -EINVAL;
+
+	start = new_range->hw_start;
+	end = new_range->hw_start + new_range->size;
+
+	mutex_lock(&io_range_mutex);
+	list_for_each_entry_rcu(range, &io_range_list, list) {
+		if (range->fwnode == new_range->fwnode) {
+			/* range already there */
+			goto end_register;
+		}
+		if (range->flags == LOGIC_PIO_CPU_MMIO &&
+		    new_range->flags == LOGIC_PIO_CPU_MMIO) {
+			/* for MMIO ranges we need to check for overlap */
+			if (start >= range->hw_start + range->size ||
+			    end < range->hw_start) {
+				mmio_sz += range->size;
+			} else {
+				ret = -EFAULT;
+				goto end_register;
+			}
+		} else if (range->flags == LOGIC_PIO_INDIRECT &&
+			   new_range->flags == LOGIC_PIO_INDIRECT) {
+			iio_sz += range->size;
+		}
+	}
+
+	/* range not registered yet, check for available space */
+	if (new_range->flags == LOGIC_PIO_CPU_MMIO) {
+		if (mmio_sz + new_range->size - 1 > MMIO_UPPER_LIMIT) {
+			/* if it's too big check if 64K space can be reserved */
+			if (mmio_sz + SZ_64K - 1 > MMIO_UPPER_LIMIT) {
+				ret = -E2BIG;
+				goto end_register;
+			}
+			new_range->size = SZ_64K;
+			pr_warn("Requested IO range too big, new size set to 64K\n");
+		}
+		new_range->io_start = mmio_sz;
+	} else if (new_range->flags == LOGIC_PIO_INDIRECT) {
+		if (iio_sz + new_range->size - 1 > IO_SPACE_LIMIT) {
+			ret = -E2BIG;
+			goto end_register;
+		}
+		new_range->io_start = iio_sz;
+	} else {
+		/* invalid flag */
+		ret = -EINVAL;
+		goto end_register;
+	}
+
+	list_add_tail_rcu(&new_range->list, &io_range_list);
+
+end_register:
+	mutex_unlock(&io_range_mutex);
+	return ret;
+}
+
+/**
+ * find_io_range_by_fwnode - find logical PIO range for given FW node
+ * @fwnode: FW node handle associated with logical PIO range
+ *
+ * Returns pointer to node on success, NULL otherwise.
+ *
+ * Traverse the io_range_list to find the registered node for @fwnode.
+ */
+struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode)
+{
+	struct logic_pio_hwaddr *range;
+
+	list_for_each_entry_rcu(range, &io_range_list, list) {
+		if (range->fwnode == fwnode)
+			return range;
+	}
+	return NULL;
+}
+
+/* Return a registered range given an input PIO token */
+static struct logic_pio_hwaddr *find_io_range(unsigned long pio)
+{
+	struct logic_pio_hwaddr *range;
+
+	list_for_each_entry_rcu(range, &io_range_list, list) {
+		if (in_range(pio, range->io_start, range->size))
+			return range;
+	}
+	pr_err("PIO entry token %lx invalid\n", pio);
+	return NULL;
+}
+
+/**
+ * logic_pio_to_hwaddr - translate logical PIO to HW address
+ * @pio: logical PIO value
+ *
+ * Returns HW address if valid, ~0 otherwise.
+ *
+ * Translate the input logical PIO to the corresponding hardware address.
+ * The input PIO should be unique in the whole logical PIO space.
+ */
+resource_size_t logic_pio_to_hwaddr(unsigned long pio)
+{
+	struct logic_pio_hwaddr *range;
+
+	range = find_io_range(pio);
+	if (range)
+		return range->hw_start + pio - range->io_start;
+
+	return (resource_size_t)~0;
+}
+
+/**
+ * logic_pio_trans_hwaddr - translate HW address to logical PIO
+ * @fwnode: FW node reference for the host
+ * @addr: Host-relative HW address
+ * @size: size to translate
+ *
+ * Returns Logical PIO value if successful, ~0UL otherwise
+ */
+unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode,
+				     resource_size_t addr, resource_size_t size)
+{
+	struct logic_pio_hwaddr *range;
+
+	range = find_io_range_by_fwnode(fwnode);
+	if (!range || range->flags == LOGIC_PIO_CPU_MMIO) {
+		pr_err("IO range not found or invalid\n");
+		return ~0UL;
+	}
+	if (range->size < size) {
+		pr_err("resource size %pa cannot fit in IO range size %pa\n",
+		       &size, &range->size);
+		return ~0UL;
+	}
+	return addr - range->hw_start + range->io_start;
+}
+
+unsigned long logic_pio_trans_cpuaddr(resource_size_t addr)
+{
+	struct logic_pio_hwaddr *range;
+
+	list_for_each_entry_rcu(range, &io_range_list, list) {
+		if (range->flags != LOGIC_PIO_CPU_MMIO)
+			continue;
+		if (in_range(addr, range->hw_start, range->size))
+			return addr - range->hw_start + range->io_start;
+	}
+	pr_err("addr %llx not registered in io_range_list\n",
+	       (unsigned long long) addr);
+	return ~0UL;
+}
+
+#if defined(CONFIG_INDIRECT_PIO) && defined(PCI_IOBASE)
+#define BUILD_LOGIC_IO(bw, type)					\
+type logic_in##bw(unsigned long addr)					\
+{									\
+	type ret = (type)~0;						\
+									\
+	if (addr < MMIO_UPPER_LIMIT) {					\
+		ret = read##bw(PCI_IOBASE + addr);			\
+	} else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) { \
+		struct logic_pio_hwaddr *entry = find_io_range(addr);	\
+									\
+		if (entry && entry->ops)				\
+			ret = entry->ops->in(entry->hostdata,		\
+					addr, sizeof(type));		\
+		else							\
+			WARN_ON_ONCE(1);				\
+	}								\
+	return ret;							\
+}									\
+									\
+void logic_out##bw(type value, unsigned long addr)			\
+{									\
+	if (addr < MMIO_UPPER_LIMIT) {					\
+		write##bw(value, PCI_IOBASE + addr);			\
+	} else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) {	\
+		struct logic_pio_hwaddr *entry = find_io_range(addr);	\
+									\
+		if (entry && entry->ops)				\
+			entry->ops->out(entry->hostdata,		\
+					addr, value, sizeof(type));	\
+		else							\
+			WARN_ON_ONCE(1);				\
+	}								\
+}									\
+									\
+void logic_ins##bw(unsigned long addr, void *buffer,		\
+		   unsigned int count)					\
+{									\
+	if (addr < MMIO_UPPER_LIMIT) {					\
+		reads##bw(PCI_IOBASE + addr, buffer, count);		\
+	} else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) {	\
+		struct logic_pio_hwaddr *entry = find_io_range(addr);	\
+									\
+		if (entry && entry->ops)				\
+			entry->ops->ins(entry->hostdata,		\
+				addr, buffer, sizeof(type), count);	\
+		else							\
+			WARN_ON_ONCE(1);				\
+	}								\
+									\
+}									\
+									\
+void logic_outs##bw(unsigned long addr, const void *buffer,		\
+		    unsigned int count)					\
+{									\
+	if (addr < MMIO_UPPER_LIMIT) {					\
+		writes##bw(PCI_IOBASE + addr, buffer, count);		\
+	} else if (addr >= MMIO_UPPER_LIMIT && addr < IO_SPACE_LIMIT) {	\
+		struct logic_pio_hwaddr *entry = find_io_range(addr);	\
+									\
+		if (entry && entry->ops)				\
+			entry->ops->outs(entry->hostdata,		\
+				addr, buffer, sizeof(type), count);	\
+		else							\
+			WARN_ON_ONCE(1);				\
+	}								\
+}
+
+BUILD_LOGIC_IO(b, u8)
+EXPORT_SYMBOL(logic_inb);
+EXPORT_SYMBOL(logic_insb);
+EXPORT_SYMBOL(logic_outb);
+EXPORT_SYMBOL(logic_outsb);
+
+BUILD_LOGIC_IO(w, u16)
+EXPORT_SYMBOL(logic_inw);
+EXPORT_SYMBOL(logic_insw);
+EXPORT_SYMBOL(logic_outw);
+EXPORT_SYMBOL(logic_outsw);
+
+BUILD_LOGIC_IO(l, u32)
+EXPORT_SYMBOL(logic_inl);
+EXPORT_SYMBOL(logic_insl);
+EXPORT_SYMBOL(logic_outl);
+EXPORT_SYMBOL(logic_outsl);
+
+#endif /* CONFIG_INDIRECT_PIO && PCI_IOBASE */
diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c
index c10fba461454..2d3eb1cb73b8 100644
--- a/lib/pci_iomap.c
+++ b/lib/pci_iomap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Implement the default iomap interfaces
  *
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index fe03c6d52761..9f96fa7bc000 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -197,10 +197,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
 
 	/*
-	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
-	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following __PERCPU_REF_ATOMIC clearing.
+	 * Restore per-cpu operation.  smp_store_release() is paired
+	 * with READ_ONCE() in __ref_is_percpu() and guarantees that the
+	 * zeroing is visible to all percpu accesses which can see the
+	 * following __PERCPU_REF_ATOMIC clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
@@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
  * This function normally doesn't block and can be called from any context
  * but it may block if @confirm_kill is specified and @ref is in the
  * process of switching to atomic mode by percpu_ref_switch_to_atomic().
+ *
+ * There are no implied RCU grace periods between kill and release.
  */
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c8d55565fafa..da9e10c827df 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -24,6 +24,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
+#include <linux/bug.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/export.h>
@@ -145,7 +146,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 
 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
 {
-	return root->gfp_mask & __GFP_BITS_MASK;
+	return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
 }
 
 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -2124,7 +2125,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 		preempt_enable();
 
 	if (!this_cpu_read(ida_bitmap)) {
-		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+		struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
 		if (!bitmap)
 			return 0;
 		if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
@@ -2135,7 +2136,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 }
 EXPORT_SYMBOL(ida_pre_get);
 
-void __rcu **idr_get_free_cmn(struct radix_tree_root *root,
+void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
 			      unsigned long max)
 {
@@ -2284,6 +2285,7 @@ void __init radix_tree_init(void)
 	int ret;
 
 	BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
+	BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
 	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index f01b1cb04f91..3de0d8921286 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -4,3 +4,4 @@ int*.c
 tables.c
 neon?.c
 s390vx?.c
+vpermxor*.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 4add700ddfe3..2f8b61dfd9b0 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -5,9 +5,9 @@ raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+                              vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
-raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 
 hostprogs-y	+= mktables
@@ -91,6 +91,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
@@ -115,11 +139,6 @@ $(obj)/neon8.c:   UNROLL := 8
 $(obj)/neon8.c:   $(src)/neon.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
-targets += tilegx8.c
-$(obj)/tilegx8.c:   UNROLL := 8
-$(obj)/tilegx8.c:   $(src)/tilegx.uc $(src)/unroll.awk FORCE
-	$(call if_changed,unroll)
-
 targets += s390vx8.c
 $(obj)/s390vx8.c:   UNROLL := 8
 $(obj)/s390vx8.c:   $(src)/s390vx.uc $(src)/unroll.awk FORCE
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 476994723258..5065b1e7e327 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,9 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_altivec2,
 	&raid6_altivec4,
 	&raid6_altivec8,
-#endif
-#if defined(CONFIG_TILEGX)
-	&raid6_tilegx8,
+	&raid6_vpermxor1,
+	&raid6_vpermxor2,
+	&raid6_vpermxor4,
+	&raid6_vpermxor8,
 #endif
 #if defined(CONFIG_S390)
 	&raid6_s390vx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8a1fef..d20ed0d11411 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include <linux/raid/pq.h>
 
+#ifdef CONFIG_ALTIVEC
+
 #include <altivec.h>
 #ifdef __KERNEL__
 # include <asm/cputable.h>
 # include <asm/switch_to.h>
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 1d2276b007ee..8191e1d0d2fb 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -91,7 +91,7 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
 				     size_t bytes, void **ptrs)
- {
+{
 	u8 **dptr = (u8 **)ptrs;
 	u8 *p, *q;
 	int d, z, z0;
@@ -200,9 +200,9 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 	kernel_fpu_end();
 }
 
- static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
+static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
 				     size_t bytes, void **ptrs)
- {
+{
 	u8 **dptr = (u8 **)ptrs;
 	u8 *p, *q;
 	int d, z, z0;
@@ -265,7 +265,7 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 	asm volatile("sfence" : : : "memory");
 	kernel_fpu_end();
- }
+}
 
 const struct raid6_calls raid6_sse2x2 = {
 	raid6_sse22_gen_syndrome,
@@ -366,9 +366,9 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 	kernel_fpu_end();
 }
 
- static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
+static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
 				     size_t bytes, void **ptrs)
- {
+{
 	u8 **dptr = (u8 **)ptrs;
 	u8 *p, *q;
 	int d, z, z0;
@@ -471,7 +471,7 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 	}
 	asm volatile("sfence" : : : "memory");
 	kernel_fpu_end();
- }
+}
 
 
 const struct raid6_calls raid6_sse2x4 = {
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index be1010bdc435..5d73f5cb4d8a 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -45,15 +45,14 @@ else ifeq ($(HAS_NEON),yes)
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
         HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
-                         gcc -c -x c - >&/dev/null && \
-                         rm ./-.o && echo yes)
+                         gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
         ifeq ($(HAS_ALTIVEC),yes)
-                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+                CFLAGS += -I../../../arch/powerpc/include
+                CFLAGS += -DCONFIG_ALTIVEC
+                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+                        vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
         endif
 endif
-ifeq ($(ARCH),tilegx)
-OBJS += tilegx8.o
-endif
 
 .c.o:
 	$(CC) $(CFLAGS) -c -o $@ $<
@@ -98,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
 int1.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=1 < int.uc > $@
 
@@ -116,15 +127,11 @@ int16.c: int.uc ../unroll.awk
 int32.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=32 < int.uc > $@
 
-tilegx8.c: tilegx.uc ../unroll.awk
-	$(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@
-
 tables.c: mktables
 	./mktables > tables.c
 
 clean:
-	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
-	rm -f tilegx*.c
+	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
 
 spotless: clean
 	rm -f *~
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
deleted file mode 100644
index 2dd291a11264..000000000000
--- a/lib/raid6/tilegx.uc
+++ /dev/null
@@ -1,87 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- *   Copyright 2002 H. Peter Anvin - All Rights Reserved
- *   Copyright 2012 Tilera Corporation - All Rights Reserved
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Boston MA 02111-1307, USA; either version 2 of the License, or
- *   (at your option) any later version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * tilegx$#.c
- *
- * $#-way unrolled TILE-Gx SIMD for RAID-6 math.
- *
- * This file is postprocessed using unroll.awk.
- *
- */
-
-#include <linux/raid/pq.h>
-
-/* Create 8 byte copies of constant byte */
-# define NBYTES(x) (__insn_v1addi(0, x))
-# define NSIZE  8
-
-/*
- * The SHLBYTE() operation shifts each byte left by 1, *not*
- * rolling over into the next byte
- */
-static inline __attribute_const__ u64 SHLBYTE(u64 v)
-{
-	/* Vector One Byte Shift Left Immediate. */
-	return __insn_v1shli(v, 1);
-}
-
-/*
- * The MASK() operation returns 0xFF in any byte for which the high
- * bit is 1, 0x00 for any byte for which the high bit is 0.
- */
-static inline __attribute_const__ u64 MASK(u64 v)
-{
-	/* Vector One Byte Shift Right Signed Immediate. */
-	return __insn_v1shrsi(v, 7);
-}
-
-
-void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
-{
-	u8 **dptr = (u8 **)ptrs;
-	u64 *p, *q;
-	int d, z, z0;
-
-	u64 wd$$, wq$$, wp$$, w1$$, w2$$;
-	u64 x1d = NBYTES(0x1d);
-	u64 * z0ptr;
-
-	z0 = disks - 3;			/* Highest data disk */
-	p = (u64 *)dptr[z0+1];	/* XOR parity */
-	q = (u64 *)dptr[z0+2];	/* RS syndrome */
-
-	z0ptr = (u64 *)&dptr[z0][0];
-	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
-		wq$$ = wp$$ = *z0ptr++;
-		for ( z = z0-1 ; z >= 0 ; z-- ) {
-			wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE];
-			wp$$ = wp$$ ^ wd$$;
-			w2$$ = MASK(wq$$);
-			w1$$ = SHLBYTE(wq$$);
-			w2$$ = w2$$ & x1d;
-			w1$$ = w1$$ ^ w2$$;
-			wq$$ = w1$$ ^ wd$$;
-		}
-		*p++ = wp$$;
-		*q++ = wq$$;
-	}
-}
-
-const struct raid6_calls raid6_tilegx$# = {
-	raid6_tilegx$#_gen_syndrome,
-	NULL,			/* XOR not yet implemented */
-	NULL,
-	"tilegx$#",
-	0
-};
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 000000000000..10475dc423c1
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and vpermxor instruction.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/ppc-opcode.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+					    0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+					    0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+					     0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+					     0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+							void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+	unative_t wp$$, wq$$, wd$$;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	for (d = 0; d < bytes; d += NSIZE*$#) {
+		wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+		for (z = z0-1; z>=0; z--) {
+			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+			/* P syndrome */
+			wp$$ = vec_xor(wp$$, wd$$);
+
+			/* Q syndrome */
+			asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+			wq$$ = vec_xor(wq$$, wd$$);
+		}
+		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
+		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
+	}
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+
+	raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+	disable_kernel_altivec();
+	preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+	/* Check if arch has both altivec and the vpermxor instructions */
+# ifdef __KERNEL__
+	return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+		cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+	return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+	raid6_vpermxor$#_gen_syndrome,
+	NULL,
+	raid6_have_altivec_vpermxor,
+	"vpermxor$#",
+	0
+};
+#endif
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index ddd7dde87c3c..2b2b79974b61 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -65,42 +65,6 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #define ASSERT_RHT_MUTEX(HT)
 #endif
 
-
-static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
-			      gfp_t gfp)
-{
-	unsigned int i, size;
-#if defined(CONFIG_PROVE_LOCKING)
-	unsigned int nr_pcpus = 2;
-#else
-	unsigned int nr_pcpus = num_possible_cpus();
-#endif
-
-	nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
-	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
-
-	/* Never allocate more than 0.5 locks per bucket */
-	size = min_t(unsigned int, size, tbl->size >> 1);
-
-	if (tbl->nest)
-		size = min(size, 1U << tbl->nest);
-
-	if (sizeof(spinlock_t) != 0) {
-		if (gfpflags_allow_blocking(gfp))
-			tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp);
-		else
-			tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-						   gfp);
-		if (!tbl->locks)
-			return -ENOMEM;
-		for (i = 0; i < size; i++)
-			spin_lock_init(&tbl->locks[i]);
-	}
-	tbl->locks_mask = size - 1;
-
-	return 0;
-}
-
 static void nested_table_free(union nested_table *ntbl, unsigned int size)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
@@ -140,7 +104,7 @@ static void bucket_table_free(const struct bucket_table *tbl)
 	if (tbl->nest)
 		nested_bucket_table_free(tbl);
 
-	kvfree(tbl->locks);
+	free_bucket_spinlocks(tbl->locks);
 	kvfree(tbl);
 }
 
@@ -207,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 					       gfp_t gfp)
 {
 	struct bucket_table *tbl = NULL;
-	size_t size;
+	size_t size, max_locks;
 	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
@@ -227,7 +191,12 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 
 	tbl->size = size;
 
-	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
+	max_locks = size >> 1;
+	if (tbl->nest)
+		max_locks = min_t(size_t, max_locks, 1U << tbl->nest);
+
+	if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks,
+				   ht->p.locks_mul, gfp) < 0) {
 		bucket_table_free(tbl);
 		return NULL;
 	}
@@ -364,6 +333,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 		err = rhashtable_rehash_chain(ht, old_hash);
 		if (err)
 			return err;
+		cond_resched();
 	}
 
 	/* Publish the new table pointer. */
@@ -537,8 +507,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 		if (!key ||
 		    (ht->p.obj_cmpfn ?
 		     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
-		     rhashtable_compare(&arg, rht_obj(ht, head))))
+		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
+			pprev = &head->next;
 			continue;
+		}
 
 		if (!ht->rhlist)
 			return rht_obj(ht, head);
@@ -707,6 +679,7 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
 	iter->p = NULL;
 	iter->slot = 0;
 	iter->skip = 0;
+	iter->end_of_table = 0;
 
 	spin_lock(&ht->lock);
 	iter->walker.tbl =
@@ -732,7 +705,7 @@ void rhashtable_walk_exit(struct rhashtable_iter *iter)
 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
 
 /**
- * rhashtable_walk_start - Start a hash table walk
+ * rhashtable_walk_start_check - Start a hash table walk
  * @iter:	Hash table iterator
  *
  * Start a hash table walk at the current iterator position.  Note that we take
@@ -744,8 +717,12 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  * Returns -EAGAIN if resize event occured.  Note that the iterator
  * will rewind back to the beginning and you may use it immediately
  * by calling rhashtable_walk_next.
+ *
+ * rhashtable_walk_start is defined as an inline variant that returns
+ * void. This is preferred in cases where the caller would ignore
+ * resize events and always continue.
  */
-int rhashtable_walk_start(struct rhashtable_iter *iter)
+int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 	__acquires(RCU)
 {
 	struct rhashtable *ht = iter->ht;
@@ -757,28 +734,26 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
 		list_del(&iter->walker.list);
 	spin_unlock(&ht->lock);
 
-	if (!iter->walker.tbl) {
+	if (!iter->walker.tbl && !iter->end_of_table) {
 		iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
 		return -EAGAIN;
 	}
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_walk_start);
+EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
 
 /**
- * rhashtable_walk_next - Return the next object and advance the iterator
- * @iter:	Hash table iterator
+ * __rhashtable_walk_find_next - Find the next element in a table (or the first
+ * one in case of a new walk).
  *
- * Note that you must call rhashtable_walk_stop when you are finished
- * with the walk.
+ * @iter:	Hash table iterator
  *
- * Returns the next object or NULL when the end of the table is reached.
+ * Returns the found object or NULL when the end of the table is reached.
  *
- * Returns -EAGAIN if resize event occured.  Note that the iterator
- * will rewind back to the beginning and you may continue to use it.
+ * Returns -EAGAIN if resize event occurred.
  */
-void *rhashtable_walk_next(struct rhashtable_iter *iter)
+static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
 {
 	struct bucket_table *tbl = iter->walker.tbl;
 	struct rhlist_head *list = iter->list;
@@ -786,13 +761,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
 	struct rhash_head *p = iter->p;
 	bool rhlist = ht->rhlist;
 
-	if (p) {
-		if (!rhlist || !(list = rcu_dereference(list->next))) {
-			p = rcu_dereference(p->next);
-			list = container_of(p, struct rhlist_head, rhead);
-		}
-		goto next;
-	}
+	if (!tbl)
+		return NULL;
 
 	for (; iter->slot < tbl->size; iter->slot++) {
 		int skip = iter->skip;
@@ -836,13 +806,90 @@ next:
 		iter->slot = 0;
 		iter->skip = 0;
 		return ERR_PTR(-EAGAIN);
+	} else {
+		iter->end_of_table = true;
 	}
 
 	return NULL;
 }
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter:	Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred.  Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+	struct rhlist_head *list = iter->list;
+	struct rhashtable *ht = iter->ht;
+	struct rhash_head *p = iter->p;
+	bool rhlist = ht->rhlist;
+
+	if (p) {
+		if (!rhlist || !(list = rcu_dereference(list->next))) {
+			p = rcu_dereference(p->next);
+			list = container_of(p, struct rhlist_head, rhead);
+		}
+		if (!rht_is_a_nulls(p)) {
+			iter->skip++;
+			iter->p = p;
+			iter->list = list;
+			return rht_obj(ht, rhlist ? &list->rhead : p);
+		}
+
+		/* At the end of this slot, switch to next one and then find
+		 * next entry from that point.
+		 */
+		iter->skip = 0;
+		iter->slot++;
+	}
+
+	return __rhashtable_walk_find_next(iter);
+}
 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
 
 /**
+ * rhashtable_walk_peek - Return the next object but don't advance the iterator
+ * @iter:	Hash table iterator
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred.  Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_peek(struct rhashtable_iter *iter)
+{
+	struct rhlist_head *list = iter->list;
+	struct rhashtable *ht = iter->ht;
+	struct rhash_head *p = iter->p;
+
+	if (p)
+		return rht_obj(ht, ht->rhlist ? &list->rhead : p);
+
+	/* No object found in current iter, find next one in the table. */
+
+	if (iter->skip) {
+		/* A nonzero skip value points to the next entry in the table
+		 * beyond that last one that was found. Decrement skip so
+		 * we find the current value. __rhashtable_walk_find_next
+		 * will restore the original value of skip assuming that
+		 * the table hasn't changed.
+		 */
+		iter->skip--;
+	}
+
+	return __rhashtable_walk_find_next(iter);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
+
+/**
  * rhashtable_walk_stop - Finish a hash table walk
  * @iter:	Hash table iterator
  *
@@ -1066,6 +1113,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 		for (i = 0; i < tbl->size; i++) {
 			struct rhash_head *pos, *next;
 
+			cond_resched();
 			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 80aa8d5463fa..e6a9c06ec70c 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -100,7 +100,7 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
 			return -1;
 		}
 
-		if (!test_and_set_bit(nr, word))
+		if (!test_and_set_bit_lock(nr, word))
 			break;
 
 		hint = nr + 1;
@@ -434,9 +434,9 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
 	/*
 	 * Pairs with the memory barrier in set_current_state() to ensure the
 	 * proper ordering of clear_bit()/waitqueue_active() in the waker and
-	 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See
-	 * the comment on waitqueue_active(). This is __after_atomic because we
-	 * just did clear_bit() in the caller.
+	 * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+	 * waiter. See the comment on waitqueue_active(). This is __after_atomic
+	 * because we just did clear_bit_unlock() in the caller.
 	 */
 	smp_mb__after_atomic();
 
@@ -462,14 +462,14 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
 		 */
 		atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
 		sbq_index_atomic_inc(&sbq->wake_index);
-		wake_up(&ws->wait);
+		wake_up_nr(&ws->wait, wake_batch);
 	}
 }
 
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 			 unsigned int cpu)
 {
-	sbitmap_clear_bit(&sbq->sb, nr);
+	sbitmap_clear_bit_unlock(&sbq->sb, nr);
 	sbq_wake_up(sbq);
 	if (likely(!sbq->round_robin && nr < sbq->sb.depth))
 		*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 7c1c55f7daaa..06dad7a072fd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -132,14 +132,7 @@ EXPORT_SYMBOL(sg_last);
 void sg_init_table(struct scatterlist *sgl, unsigned int nents)
 {
 	memset(sgl, 0, sizeof(*sgl) * nents);
-#ifdef CONFIG_DEBUG_SG
-	{
-		unsigned int i;
-		for (i = 0; i < nents; i++)
-			sgl[i].sg_magic = SG_MAGIC;
-	}
-#endif
-	sg_mark_end(&sgl[nents - 1]);
+	sg_init_marker(sgl, nents);
 }
 EXPORT_SYMBOL(sg_init_table);
 
@@ -474,6 +467,133 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 }
 EXPORT_SYMBOL(sg_alloc_table_from_pages);
 
+#ifdef CONFIG_SGL_ALLOC
+
+/**
+ * sgl_alloc_order - allocate a scatterlist and its pages
+ * @length: Length in bytes of the scatterlist. Must be at least one
+ * @order: Second argument for alloc_pages()
+ * @chainable: Whether or not to allocate an extra element in the scatterlist
+ *	for scatterlist chaining purposes
+ * @gfp: Memory allocation flags
+ * @nent_p: [out] Number of entries in the scatterlist that have pages
+ *
+ * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
+ */
+struct scatterlist *sgl_alloc_order(unsigned long long length,
+				    unsigned int order, bool chainable,
+				    gfp_t gfp, unsigned int *nent_p)
+{
+	struct scatterlist *sgl, *sg;
+	struct page *page;
+	unsigned int nent, nalloc;
+	u32 elem_len;
+
+	nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order);
+	/* Check for integer overflow */
+	if (length > (nent << (PAGE_SHIFT + order)))
+		return NULL;
+	nalloc = nent;
+	if (chainable) {
+		/* Check for integer overflow */
+		if (nalloc + 1 < nalloc)
+			return NULL;
+		nalloc++;
+	}
+	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
+			    (gfp & ~GFP_DMA) | __GFP_ZERO);
+	if (!sgl)
+		return NULL;
+
+	sg_init_table(sgl, nalloc);
+	sg = sgl;
+	while (length) {
+		elem_len = min_t(u64, length, PAGE_SIZE << order);
+		page = alloc_pages(gfp, order);
+		if (!page) {
+			sgl_free(sgl);
+			return NULL;
+		}
+
+		sg_set_page(sg, page, elem_len, 0);
+		length -= elem_len;
+		sg = sg_next(sg);
+	}
+	WARN_ONCE(length, "length = %lld\n", length);
+	if (nent_p)
+		*nent_p = nent;
+	return sgl;
+}
+EXPORT_SYMBOL(sgl_alloc_order);
+
+/**
+ * sgl_alloc - allocate a scatterlist and its pages
+ * @length: Length in bytes of the scatterlist
+ * @gfp: Memory allocation flags
+ * @nent_p: [out] Number of entries in the scatterlist
+ *
+ * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
+ */
+struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
+			      unsigned int *nent_p)
+{
+	return sgl_alloc_order(length, 0, false, gfp, nent_p);
+}
+EXPORT_SYMBOL(sgl_alloc);
+
+/**
+ * sgl_free_n_order - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ * @nents: Maximum number of elements to free
+ * @order: Second argument for __free_pages()
+ *
+ * Notes:
+ * - If several scatterlists have been chained and each chain element is
+ *   freed separately then it's essential to set nents correctly to avoid that a
+ *   page would get freed twice.
+ * - All pages in a chained scatterlist can be freed at once by setting @nents
+ *   to a high number.
+ */
+void sgl_free_n_order(struct scatterlist *sgl, int nents, int order)
+{
+	struct scatterlist *sg;
+	struct page *page;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		if (!sg)
+			break;
+		page = sg_page(sg);
+		if (page)
+			__free_pages(page, order);
+	}
+	kfree(sgl);
+}
+EXPORT_SYMBOL(sgl_free_n_order);
+
+/**
+ * sgl_free_order - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ * @order: Second argument for __free_pages()
+ */
+void sgl_free_order(struct scatterlist *sgl, int order)
+{
+	sgl_free_n_order(sgl, INT_MAX, order);
+}
+EXPORT_SYMBOL(sgl_free_order);
+
+/**
+ * sgl_free - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ */
+void sgl_free(struct scatterlist *sgl)
+{
+	sgl_free_order(sgl, 0);
+}
+EXPORT_SYMBOL(sgl_free);
+
+#endif /* CONFIG_SGL_ALLOC */
+
 void __sg_page_iter_start(struct sg_page_iter *piter,
 			  struct scatterlist *sglist, unsigned int nents,
 			  unsigned long pgoffset)
diff --git a/lib/sha256.c b/lib/sha256.c
new file mode 100644
index 000000000000..4400c832e2aa
--- /dev/null
+++ b/lib/sha256.c
@@ -0,0 +1,283 @@
+/*
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/sha256.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+static inline u32 Ch(u32 x, u32 y, u32 z)
+{
+	return z ^ (x & (y ^ z));
+}
+
+static inline u32 Maj(u32 x, u32 y, u32 z)
+{
+	return (x & y) | (z & (x | y));
+}
+
+#define e0(x)       (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
+#define e1(x)       (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
+#define s0(x)       (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
+#define s1(x)       (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
+
+static inline void LOAD_OP(int I, u32 *W, const u8 *input)
+{
+	W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
+}
+
+static inline void BLEND_OP(int I, u32 *W)
+{
+	W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
+}
+
+static void sha256_transform(u32 *state, const u8 *input)
+{
+	u32 a, b, c, d, e, f, g, h, t1, t2;
+	u32 W[64];
+	int i;
+
+	/* load the input */
+	for (i = 0; i < 16; i++)
+		LOAD_OP(i, W, input);
+
+	/* now blend */
+	for (i = 16; i < 64; i++)
+		BLEND_OP(i, W);
+
+	/* load the state into our registers */
+	a = state[0];  b = state[1];  c = state[2];  d = state[3];
+	e = state[4];  f = state[5];  g = state[6];  h = state[7];
+
+	/* now iterate */
+	t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+
+	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
+
+	/* clear any sensitive info... */
+	a = b = c = d = e = f = g = h = t1 = t2 = 0;
+	memset(W, 0, 64 * sizeof(u32));
+}
+
+int sha256_init(struct sha256_state *sctx)
+{
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+{
+	unsigned int partial, done;
+	const u8 *src;
+
+	partial = sctx->count & 0x3f;
+	sctx->count += len;
+	done = 0;
+	src = data;
+
+	if ((partial + len) > 63) {
+		if (partial) {
+			done = -partial;
+			memcpy(sctx->buf + partial, data, done + 64);
+			src = sctx->buf;
+		}
+
+		do {
+			sha256_transform(sctx->state, src);
+			done += 64;
+			src = data + done;
+		} while (done + 63 < len);
+
+		partial = 0;
+	}
+	memcpy(sctx->buf + partial, src, len - done);
+
+	return 0;
+}
+
+int sha256_final(struct sha256_state *sctx, u8 *out)
+{
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	unsigned int index, pad_len;
+	int i;
+	static const u8 padding[64] = { 0x80, };
+
+	/* Save number of bits */
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64. */
+	index = sctx->count & 0x3f;
+	pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+	sha256_update(sctx, padding, pad_len);
+
+	/* Append length (before padding) */
+	sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Zeroize sensitive information. */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 835cc6df2776..85925aaa4fff 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -5,7 +5,6 @@
  * DEBUG_PREEMPT variant of smp_processor_id().
  */
 #include <linux/export.h>
-#include <linux/kallsyms.h>
 #include <linux/sched.h>
 
 notrace static unsigned int check_preemption_disabled(const char *what1,
@@ -43,7 +42,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
 	printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
 		what1, what2, preempt_count() - 1, current->comm, current->pid);
 
-	print_symbol("caller is %s\n", (long)__builtin_return_address(0));
+	printk("caller is %pS\n", __builtin_return_address(0));
 	dump_stack();
 
 out_enable:
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index f87d138e9672..e513459a5601 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -163,6 +163,21 @@ static inline u32 hash_stack(unsigned long *entries, unsigned int size)
 			       STACK_HASH_SEED);
 }
 
+/* Use our own, non-instrumented version of memcmp().
+ *
+ * We actually don't care about the order, just the equality.
+ */
+static inline
+int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
+			unsigned int n)
+{
+	for ( ; n-- ; u1++, u2++) {
+		if (*u1 != *u2)
+			return 1;
+	}
+	return 0;
+}
+
 /* Find a stack that is equal to the one stored in entries in the hash */
 static inline struct stack_record *find_stack(struct stack_record *bucket,
 					     unsigned long *entries, int size,
@@ -173,10 +188,8 @@ static inline struct stack_record *find_stack(struct stack_record *bucket,
 	for (found = bucket; found; found = found->next) {
 		if (found->hash == hash &&
 		    found->size == size &&
-		    !memcmp(entries, found->entries,
-			    size * sizeof(unsigned long))) {
+		    !stackdepot_memcmp(entries, found->entries, size))
 			return found;
-		}
 	}
 	return NULL;
 }
diff --git a/lib/string.c b/lib/string.c
index 64a9e33f1daa..2c0900a5d51a 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -203,7 +203,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
 	while (max >= sizeof(unsigned long)) {
 		unsigned long c, data;
 
-		c = *(unsigned long *)(src+res);
+		c = read_word_at_a_time(src+res);
 		if (has_zero(c, &data, &constants)) {
 			data = prep_zero_mask(c, data, &constants);
 			data = create_zero_mask(data);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index cea19aaf303c..fece57566d45 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -18,7 +18,7 @@
  */
 
 #include <linux/cache.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
@@ -31,6 +31,7 @@
 #include <linux/gfp.h>
 #include <linux/scatterlist.h>
 #include <linux/mem_encrypt.h>
+#include <linux/set_memory.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -156,22 +157,6 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
-
-/* For swiotlb, clear memory encryption mask from dma addresses */
-static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
-				      phys_addr_t address)
-{
-	return __sme_clr(phys_to_dma(hwdev, address));
-}
-
-/* Note that this doesn't work with highmem page */
-static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
-				      volatile void *address)
-{
-	return phys_to_dma(hwdev, virt_to_phys(address));
-}
-
 static bool no_iotlb_memory;
 
 void swiotlb_print_info(void)
@@ -209,12 +194,12 @@ void __init swiotlb_update_mem_attributes(void)
 
 	vaddr = phys_to_virt(io_tlb_start);
 	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
-	swiotlb_set_mem_attributes(vaddr, bytes);
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 
 	vaddr = phys_to_virt(io_tlb_overflow_buffer);
 	bytes = PAGE_ALIGN(io_tlb_overflow);
-	swiotlb_set_mem_attributes(vaddr, bytes);
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 }
 
@@ -355,7 +340,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	io_tlb_start = virt_to_phys(tlb);
 	io_tlb_end = io_tlb_start + bytes;
 
-	swiotlb_set_mem_attributes(tlb, bytes);
+	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
 
 	/*
@@ -366,7 +351,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!v_overflow_buffer)
 		goto cleanup2;
 
-	swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
+	set_memory_decrypted((unsigned long)v_overflow_buffer,
+			io_tlb_overflow >> PAGE_SHIFT);
 	memset(v_overflow_buffer, 0, io_tlb_overflow);
 	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
 
@@ -417,7 +403,7 @@ cleanup2:
 	return -ENOMEM;
 }
 
-void __init swiotlb_free(void)
+void __init swiotlb_exit(void)
 {
 	if (!io_tlb_orig_addr)
 		return;
@@ -586,7 +572,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 
 not_found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	if (printk_ratelimit())
+	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
 		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
 	return SWIOTLB_MAP_ERROR;
 found:
@@ -605,7 +591,6 @@ found:
 
 	return tlb_addr;
 }
-EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
 
 /*
  * Allocates bounce buffer and returns its kernel virtual address.
@@ -623,7 +608,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 		return SWIOTLB_MAP_ERROR;
 	}
 
-	start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
+	start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
 	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
 				      dir, attrs);
 }
@@ -675,7 +660,6 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	}
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
-EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
 
 void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
@@ -707,94 +691,80 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 		BUG();
 	}
 }
-EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
 
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, gfp_t flags)
+#ifdef CONFIG_DMA_DIRECT_OPS
+static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
+		size_t size)
 {
-	dma_addr_t dev_addr;
-	void *ret;
-	int order = get_order(size);
-	u64 dma_mask = DMA_BIT_MASK(32);
-
-	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = hwdev->coherent_dma_mask;
-
-	ret = (void *)__get_free_pages(flags, order);
-	if (ret) {
-		dev_addr = swiotlb_virt_to_bus(hwdev, ret);
-		if (dev_addr + size - 1 > dma_mask) {
-			/*
-			 * The allocated memory isn't reachable by the device.
-			 */
-			free_pages((unsigned long) ret, order);
-			ret = NULL;
-		}
-	}
-	if (!ret) {
-		/*
-		 * We are either out of memory or the device can't DMA to
-		 * GFP_DMA memory; fall back on map_single(), which
-		 * will grab memory from the lowest available address range.
-		 */
-		phys_addr_t paddr = map_single(hwdev, 0, size,
-					       DMA_FROM_DEVICE, 0);
-		if (paddr == SWIOTLB_MAP_ERROR)
-			goto err_warn;
+	u64 mask = DMA_BIT_MASK(32);
 
-		ret = phys_to_virt(paddr);
-		dev_addr = swiotlb_phys_to_dma(hwdev, paddr);
+	if (dev && dev->coherent_dma_mask)
+		mask = dev->coherent_dma_mask;
+	return addr + size - 1 <= mask;
+}
 
-		/* Confirm address can be DMA'd by device */
-		if (dev_addr + size - 1 > dma_mask) {
-			printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-			       (unsigned long long)dma_mask,
-			       (unsigned long long)dev_addr);
+static void *
+swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		unsigned long attrs)
+{
+	phys_addr_t phys_addr;
 
-			/*
-			 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-			 * The DMA_ATTR_SKIP_CPU_SYNC is optional.
-			 */
-			swiotlb_tbl_unmap_single(hwdev, paddr,
-						 size, DMA_TO_DEVICE,
-						 DMA_ATTR_SKIP_CPU_SYNC);
-			goto err_warn;
-		}
-	}
+	if (swiotlb_force == SWIOTLB_NO_FORCE)
+		goto out_warn;
+
+	phys_addr = swiotlb_tbl_map_single(dev,
+			__phys_to_dma(dev, io_tlb_start),
+			0, size, DMA_FROM_DEVICE, 0);
+	if (phys_addr == SWIOTLB_MAP_ERROR)
+		goto out_warn;
 
-	*dma_handle = dev_addr;
-	memset(ret, 0, size);
+	*dma_handle = __phys_to_dma(dev, phys_addr);
+	if (!dma_coherent_ok(dev, *dma_handle, size))
+		goto out_unmap;
 
-	return ret;
+	memset(phys_to_virt(phys_addr), 0, size);
+	return phys_to_virt(phys_addr);
 
-err_warn:
-	pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n",
-		dev_name(hwdev), size);
-	dump_stack();
+out_unmap:
+	dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+		(unsigned long long)(dev ? dev->coherent_dma_mask : 0),
+		(unsigned long long)*dma_handle);
 
+	/*
+	 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
+	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
+	 */
+	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
+			DMA_ATTR_SKIP_CPU_SYNC);
+out_warn:
+	if ((attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
+		dev_warn(dev,
+			"swiotlb: coherent allocation failed, size=%zu\n",
+			size);
+		dump_stack();
+	}
 	return NULL;
 }
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
 
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-		      dma_addr_t dev_addr)
+static bool swiotlb_free_buffer(struct device *dev, size_t size,
+		dma_addr_t dma_addr)
 {
-	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
+	phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
 
-	WARN_ON(irqs_disabled());
-	if (!is_swiotlb_buffer(paddr))
-		free_pages((unsigned long)vaddr, get_order(size));
-	else
-		/*
-		 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-		 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-		 */
-		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE,
-					 DMA_ATTR_SKIP_CPU_SYNC);
+	WARN_ON_ONCE(irqs_disabled());
+
+	if (!is_swiotlb_buffer(phys_addr))
+		return false;
+
+	/*
+	 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
+	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
+	 */
+	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
+				 DMA_ATTR_SKIP_CPU_SYNC);
+	return true;
 }
-EXPORT_SYMBOL(swiotlb_free_coherent);
+#endif
 
 static void
 swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
@@ -854,10 +824,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	map = map_single(dev, phys, size, dir, attrs);
 	if (map == SWIOTLB_MAP_ERROR) {
 		swiotlb_full(dev, size, dir, 1);
-		return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+		return __phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
-	dev_addr = swiotlb_phys_to_dma(dev, map);
+	dev_addr = __phys_to_dma(dev, map);
 
 	/* Ensure that the address returned is DMA'ble */
 	if (dma_capable(dev, dev_addr, size))
@@ -866,9 +836,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
 	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
 
-	return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+	return __phys_to_dma(dev, io_tlb_overflow_buffer);
 }
-EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
@@ -909,7 +878,6 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 {
 	unmap_single(hwdev, dev_addr, size, dir, attrs);
 }
-EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
 /*
  * Make physical memory consistent for a single streaming mode DMA translation
@@ -947,7 +915,6 @@ swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
 {
 	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
 }
-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
 
 void
 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
@@ -955,7 +922,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
 {
 	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
 }
-EXPORT_SYMBOL(swiotlb_sync_single_for_device);
 
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
@@ -1000,14 +966,13 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sg_dma_len(sgl) = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
+			sg->dma_address = __phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
 		sg_dma_len(sg) = sg->length;
 	}
 	return nelems;
 }
-EXPORT_SYMBOL(swiotlb_map_sg_attrs);
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
@@ -1027,7 +992,6 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
 			     attrs);
 }
-EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
 /*
  * Make physical memory consistent for a set of streaming mode DMA translations
@@ -1055,7 +1019,6 @@ swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
 {
 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
 }
-EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
 
 void
 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
@@ -1063,14 +1026,12 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 {
 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
 }
-EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
 
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
-	return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
+	return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
 }
-EXPORT_SYMBOL(swiotlb_dma_mapping_error);
 
 /*
  * Return whether the given device DMA address mask can be supported
@@ -1081,6 +1042,51 @@ EXPORT_SYMBOL(swiotlb_dma_mapping_error);
 int
 swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
-EXPORT_SYMBOL(swiotlb_dma_supported);
+
+#ifdef CONFIG_DMA_DIRECT_OPS
+void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
+{
+	void *vaddr;
+
+	/* temporary workaround: */
+	if (gfp & __GFP_NOWARN)
+		attrs |= DMA_ATTR_NO_WARN;
+
+	/*
+	 * Don't print a warning when the first allocation attempt fails.
+	 * swiotlb_alloc_coherent() will print a warning when the DMA memory
+	 * allocation ultimately failed.
+	 */
+	gfp |= __GFP_NOWARN;
+
+	vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+	if (!vaddr)
+		vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
+	return vaddr;
+}
+
+void swiotlb_free(struct device *dev, size_t size, void *vaddr,
+		dma_addr_t dma_addr, unsigned long attrs)
+{
+	if (!swiotlb_free_buffer(dev, size, dma_addr))
+		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
+}
+
+const struct dma_map_ops swiotlb_dma_ops = {
+	.mapping_error		= swiotlb_dma_mapping_error,
+	.alloc			= swiotlb_alloc,
+	.free			= swiotlb_free,
+	.sync_single_for_cpu	= swiotlb_sync_single_for_cpu,
+	.sync_single_for_device	= swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu	= swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device	= swiotlb_sync_sg_for_device,
+	.map_sg			= swiotlb_map_sg_attrs,
+	.unmap_sg		= swiotlb_unmap_sg_attrs,
+	.map_page		= swiotlb_map_page,
+	.unmap_page		= swiotlb_unmap_page,
+	.dma_supported		= dma_direct_supported,
+};
+#endif /* CONFIG_DMA_DIRECT_OPS */
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index aa1f2669bdd5..de16f7869fb1 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -23,7 +23,7 @@ __check_eq_uint(const char *srcfile, unsigned int line,
 		const unsigned int exp_uint, unsigned int x)
 {
 	if (exp_uint != x) {
-		pr_warn("[%s:%u] expected %u, got %u\n",
+		pr_err("[%s:%u] expected %u, got %u\n",
 			srcfile, line, exp_uint, x);
 		return false;
 	}
@@ -33,19 +33,13 @@ __check_eq_uint(const char *srcfile, unsigned int line,
 
 static bool __init
 __check_eq_bitmap(const char *srcfile, unsigned int line,
-		  const unsigned long *exp_bmap, unsigned int exp_nbits,
-		  const unsigned long *bmap, unsigned int nbits)
+		  const unsigned long *exp_bmap, const unsigned long *bmap,
+		  unsigned int nbits)
 {
-	if (exp_nbits != nbits) {
-		pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n",
-			srcfile, line, exp_nbits, nbits);
-		return false;
-	}
-
 	if (!bitmap_equal(exp_bmap, bmap, nbits)) {
 		pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n",
 			srcfile, line,
-			exp_nbits, exp_bmap, nbits, bmap);
+			nbits, exp_bmap, nbits, bmap);
 		return false;
 	}
 	return true;
@@ -69,6 +63,10 @@ __check_eq_pbl(const char *srcfile, unsigned int line,
 static bool __init
 __check_eq_u32_array(const char *srcfile, unsigned int line,
 		     const u32 *exp_arr, unsigned int exp_len,
+		     const u32 *arr, unsigned int len) __used;
+static bool __init
+__check_eq_u32_array(const char *srcfile, unsigned int line,
+		     const u32 *exp_arr, unsigned int exp_len,
 		     const u32 *arr, unsigned int len)
 {
 	if (exp_len != len) {
@@ -107,7 +105,65 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
 #define expect_eq_pbl(...)		__expect_eq(pbl, ##__VA_ARGS__)
 #define expect_eq_u32_array(...)	__expect_eq(u32_array, ##__VA_ARGS__)
 
-static void __init test_zero_fill_copy(void)
+static void __init test_zero_clear(void)
+{
+	DECLARE_BITMAP(bmap, 1024);
+
+	/* Known way to set all bits */
+	memset(bmap, 0xff, 128);
+
+	expect_eq_pbl("0-22", bmap, 23);
+	expect_eq_pbl("0-1023", bmap, 1024);
+
+	/* single-word bitmaps */
+	bitmap_clear(bmap, 0, 9);
+	expect_eq_pbl("9-1023", bmap, 1024);
+
+	bitmap_zero(bmap, 35);
+	expect_eq_pbl("64-1023", bmap, 1024);
+
+	/* cross boundaries operations */
+	bitmap_clear(bmap, 79, 19);
+	expect_eq_pbl("64-78,98-1023", bmap, 1024);
+
+	bitmap_zero(bmap, 115);
+	expect_eq_pbl("128-1023", bmap, 1024);
+
+	/* Zeroing entire area */
+	bitmap_zero(bmap, 1024);
+	expect_eq_pbl("", bmap, 1024);
+}
+
+static void __init test_fill_set(void)
+{
+	DECLARE_BITMAP(bmap, 1024);
+
+	/* Known way to clear all bits */
+	memset(bmap, 0x00, 128);
+
+	expect_eq_pbl("", bmap, 23);
+	expect_eq_pbl("", bmap, 1024);
+
+	/* single-word bitmaps */
+	bitmap_set(bmap, 0, 9);
+	expect_eq_pbl("0-8", bmap, 1024);
+
+	bitmap_fill(bmap, 35);
+	expect_eq_pbl("0-63", bmap, 1024);
+
+	/* cross boundaries operations */
+	bitmap_set(bmap, 79, 19);
+	expect_eq_pbl("0-63,79-97", bmap, 1024);
+
+	bitmap_fill(bmap, 115);
+	expect_eq_pbl("0-127", bmap, 1024);
+
+	/* Zeroing entire area */
+	bitmap_fill(bmap, 1024);
+	expect_eq_pbl("0-1023", bmap, 1024);
+}
+
+static void __init test_copy(void)
 {
 	DECLARE_BITMAP(bmap1, 1024);
 	DECLARE_BITMAP(bmap2, 1024);
@@ -116,36 +172,20 @@ static void __init test_zero_fill_copy(void)
 	bitmap_zero(bmap2, 1024);
 
 	/* single-word bitmaps */
-	expect_eq_pbl("", bmap1, 23);
-
-	bitmap_fill(bmap1, 19);
-	expect_eq_pbl("0-18", bmap1, 1024);
-
+	bitmap_set(bmap1, 0, 19);
 	bitmap_copy(bmap2, bmap1, 23);
 	expect_eq_pbl("0-18", bmap2, 1024);
 
-	bitmap_fill(bmap2, 23);
-	expect_eq_pbl("0-22", bmap2, 1024);
-
+	bitmap_set(bmap2, 0, 23);
 	bitmap_copy(bmap2, bmap1, 23);
 	expect_eq_pbl("0-18", bmap2, 1024);
 
-	bitmap_zero(bmap1, 23);
-	expect_eq_pbl("", bmap1, 1024);
-
 	/* multi-word bitmaps */
-	bitmap_zero(bmap1, 1024);
-	expect_eq_pbl("", bmap1, 1024);
-
-	bitmap_fill(bmap1, 109);
-	expect_eq_pbl("0-108", bmap1, 1024);
-
+	bitmap_set(bmap1, 0, 109);
 	bitmap_copy(bmap2, bmap1, 1024);
 	expect_eq_pbl("0-108", bmap2, 1024);
 
 	bitmap_fill(bmap2, 1024);
-	expect_eq_pbl("0-1023", bmap2, 1024);
-
 	bitmap_copy(bmap2, bmap1, 1024);
 	expect_eq_pbl("0-108", bmap2, 1024);
 
@@ -160,9 +200,6 @@ static void __init test_zero_fill_copy(void)
 	bitmap_fill(bmap2, 1024);
 	bitmap_copy(bmap2, bmap1, 97);  /* ... but aligned on word length */
 	expect_eq_pbl("0-108,128-1023", bmap2, 1024);
-
-	bitmap_zero(bmap2, 97);  /* ... but 0-padded til word length */
-	expect_eq_pbl("128-1023", bmap2, 1024);
 }
 
 #define PARSE_TIME 0x1
@@ -218,6 +255,10 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 	{-EINVAL, "-1",	NULL, 8, 0},
 	{-EINVAL, "-0",	NULL, 8, 0},
 	{-EINVAL, "10-1", NULL, 8, 0},
+	{-EINVAL, "0-31:", NULL, 8, 0},
+	{-EINVAL, "0-31:0", NULL, 8, 0},
+	{-EINVAL, "0-31:0/0", NULL, 8, 0},
+	{-EINVAL, "0-31:1/0", NULL, 8, 0},
 	{-EINVAL, "0-31:10/1", NULL, 8, 0},
 };
 
@@ -255,171 +296,31 @@ static void __init test_bitmap_parselist(void)
 	}
 }
 
-static void __init test_bitmap_u32_array_conversions(void)
+#define EXP_BYTES	(sizeof(exp) * 8)
+
+static void __init test_bitmap_arr32(void)
 {
-	DECLARE_BITMAP(bmap1, 1024);
-	DECLARE_BITMAP(bmap2, 1024);
-	u32 exp_arr[32], arr[32];
-	unsigned nbits;
-
-	for (nbits = 0 ; nbits < 257 ; ++nbits) {
-		const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32);
-		unsigned int i, rv;
-
-		bitmap_zero(bmap1, nbits);
-		bitmap_set(bmap1, nbits, 1024 - nbits);  /* garbage */
-
-		memset(arr, 0xff, sizeof(arr));
-		rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits);
-		expect_eq_uint(nbits, rv);
-
-		memset(exp_arr, 0xff, sizeof(exp_arr));
-		memset(exp_arr, 0, used_u32s*sizeof(*exp_arr));
-		expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-		bitmap_fill(bmap2, 1024);
-		rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s);
-		expect_eq_uint(nbits, rv);
-		expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-		for (i = 0 ; i < nbits ; ++i) {
-			/*
-			 * test conversion bitmap -> u32[]
-			 */
-
-			bitmap_zero(bmap1, 1024);
-			__set_bit(i, bmap1);
-			bitmap_set(bmap1, nbits, 1024 - nbits);  /* garbage */
-
-			memset(arr, 0xff, sizeof(arr));
-			rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits);
-			expect_eq_uint(nbits, rv);
-
-			/* 1st used u32 words contain expected bit set, the
-			 * remaining words are left unchanged (0xff)
-			 */
-			memset(exp_arr, 0xff, sizeof(exp_arr));
-			memset(exp_arr, 0, used_u32s*sizeof(*exp_arr));
-			exp_arr[i/32] = (1U<<(i%32));
-			expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-
-			/* same, with longer array to fill
-			 */
-			memset(arr, 0xff, sizeof(arr));
-			rv = bitmap_to_u32array(arr, 32, bmap1, nbits);
-			expect_eq_uint(nbits, rv);
-
-			/* 1st used u32 words contain expected bit set, the
-			 * remaining words are all 0s
-			 */
-			memset(exp_arr, 0, sizeof(exp_arr));
-			exp_arr[i/32] = (1U<<(i%32));
-			expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-			/*
-			 * test conversion u32[] -> bitmap
-			 */
-
-			/* the 1st nbits of bmap2 are identical to
-			 * bmap1, the remaining bits of bmap2 are left
-			 * unchanged (all 1s)
-			 */
-			bitmap_fill(bmap2, 1024);
-			rv = bitmap_from_u32array(bmap2, nbits,
-						  exp_arr, used_u32s);
-			expect_eq_uint(nbits, rv);
-
-			expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-			/* same, with more bits to fill
-			 */
-			memset(arr, 0xff, sizeof(arr));  /* garbage */
-			memset(arr, 0, used_u32s*sizeof(u32));
-			arr[i/32] = (1U<<(i%32));
-
-			bitmap_fill(bmap2, 1024);
-			rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s);
-			expect_eq_uint(used_u32s*32, rv);
-
-			/* the 1st nbits of bmap2 are identical to
-			 * bmap1, the remaining bits of bmap2 are cleared
-			 */
-			bitmap_zero(bmap1, 1024);
-			__set_bit(i, bmap1);
-			expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-
-			/*
-			 * test short conversion bitmap -> u32[] (1
-			 * word too short)
-			 */
-			if (used_u32s > 1) {
-				bitmap_zero(bmap1, 1024);
-				__set_bit(i, bmap1);
-				bitmap_set(bmap1, nbits,
-					   1024 - nbits);  /* garbage */
-				memset(arr, 0xff, sizeof(arr));
-
-				rv = bitmap_to_u32array(arr, used_u32s - 1,
-							bmap1, nbits);
-				expect_eq_uint((used_u32s - 1)*32, rv);
-
-				/* 1st used u32 words contain expected
-				 * bit set, the remaining words are
-				 * left unchanged (0xff)
-				 */
-				memset(exp_arr, 0xff, sizeof(exp_arr));
-				memset(exp_arr, 0,
-				       (used_u32s-1)*sizeof(*exp_arr));
-				if ((i/32) < (used_u32s - 1))
-					exp_arr[i/32] = (1U<<(i%32));
-				expect_eq_u32_array(exp_arr, 32, arr, 32);
-			}
-
-			/*
-			 * test short conversion u32[] -> bitmap (3
-			 * bits too short)
-			 */
-			if (nbits > 3) {
-				memset(arr, 0xff, sizeof(arr));  /* garbage */
-				memset(arr, 0, used_u32s*sizeof(*arr));
-				arr[i/32] = (1U<<(i%32));
-
-				bitmap_zero(bmap1, 1024);
-				rv = bitmap_from_u32array(bmap1, nbits - 3,
-							  arr, used_u32s);
-				expect_eq_uint(nbits - 3, rv);
-
-				/* we are expecting the bit < nbits -
-				 * 3 (none otherwise), and the rest of
-				 * bmap1 unchanged (0-filled)
-				 */
-				bitmap_zero(bmap2, 1024);
-				if (i < nbits - 3)
-					__set_bit(i, bmap2);
-				expect_eq_bitmap(bmap2, 1024, bmap1, 1024);
-
-				/* do the same with bmap1 initially
-				 * 1-filled
-				 */
-
-				bitmap_fill(bmap1, 1024);
-				rv = bitmap_from_u32array(bmap1, nbits - 3,
-							 arr, used_u32s);
-				expect_eq_uint(nbits - 3, rv);
-
-				/* we are expecting the bit < nbits -
-				 * 3 (none otherwise), and the rest of
-				 * bmap1 unchanged (1-filled)
-				 */
-				bitmap_zero(bmap2, 1024);
-				if (i < nbits - 3)
-					__set_bit(i, bmap2);
-				bitmap_set(bmap2, nbits-3, 1024 - nbits + 3);
-				expect_eq_bitmap(bmap2, 1024, bmap1, 1024);
-			}
-		}
+	unsigned int nbits, next_bit;
+	u32 arr[sizeof(exp) / 4];
+	DECLARE_BITMAP(bmap2, EXP_BYTES);
+
+	memset(arr, 0xa5, sizeof(arr));
+
+	for (nbits = 0; nbits < EXP_BYTES; ++nbits) {
+		bitmap_to_arr32(arr, exp, nbits);
+		bitmap_from_arr32(bmap2, arr, nbits);
+		expect_eq_bitmap(bmap2, exp, nbits);
+
+		next_bit = find_next_bit(bmap2,
+				round_up(nbits, BITS_PER_LONG), nbits);
+		if (next_bit < round_up(nbits, BITS_PER_LONG))
+			pr_err("bitmap_copy_arr32(nbits == %d:"
+				" tail is not safely cleared: %d\n",
+				nbits, next_bit);
+
+		if (nbits < EXP_BYTES - 32)
+			expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
+								0xa5a5a5a5);
 	}
 }
 
@@ -453,8 +354,10 @@ static void noinline __init test_mem_optimisations(void)
 
 static int __init test_bitmap_init(void)
 {
-	test_zero_fill_copy();
-	test_bitmap_u32_array_conversions();
+	test_zero_clear();
+	test_fill_set();
+	test_copy();
+	test_bitmap_arr32();
 	test_bitmap_parselist();
 	test_mem_optimisations();
 
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index f369889e521d..8e157806df7a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -24,10 +24,11 @@
 #include <linux/if_vlan.h>
 #include <linux/random.h>
 #include <linux/highmem.h>
+#include <linux/sched.h>
 
 /* General test specific settings */
 #define MAX_SUBTESTS	3
-#define MAX_TESTRUNS	10000
+#define MAX_TESTRUNS	1000
 #define MAX_DATA	128
 #define MAX_INSNS	512
 #define MAX_K		0xffffFFFF
@@ -83,6 +84,7 @@ struct bpf_test {
 		__u32 result;
 	} test[MAX_SUBTESTS];
 	int (*fill_helper)(struct bpf_test *self);
+	int expected_errcode; /* used when FLAG_EXPECTED_FAIL is set in the aux */
 	__u8 frag_data[MAX_DATA];
 	int stack_depth; /* for eBPF only, since tests don't call verifier */
 };
@@ -2003,10 +2005,14 @@ static struct bpf_test tests[] = {
 		{ { 4, 0 }, { 5, 10 } }
 	},
 	{
-		"INT: DIV by zero",
+		/* This one doesn't go through verifier, but is just raw insn
+		 * as opposed to cBPF tests from here. Thus div by 0 tests are
+		 * done in test_verifier in BPF kselftests.
+		 */
+		"INT: DIV by -1",
 		.u.insns_int = {
 			BPF_ALU64_REG(BPF_MOV, R6, R1),
-			BPF_ALU64_IMM(BPF_MOV, R7, 0),
+			BPF_ALU64_IMM(BPF_MOV, R7, -1),
 			BPF_LD_ABS(BPF_B, 3),
 			BPF_ALU32_REG(BPF_DIV, R0, R7),
 			BPF_EXIT_INSN(),
@@ -2022,7 +2028,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: div_k_0",
@@ -2032,7 +2040,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: unknown insn",
@@ -2043,7 +2053,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: out of range spill/fill",
@@ -2053,7 +2065,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"JUMPS + HOLES",
@@ -2145,6 +2159,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: LDX + RET X",
@@ -2155,6 +2171,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Mainly checking JIT here. */
 		"M[]: alt STX + LDX",
@@ -2329,6 +2347,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Passes checker but fails during runtime. */
 		"LD [SKF_AD_OFF-1]",
@@ -5391,6 +5411,7 @@ static struct bpf_test tests[] = {
 		{ },
 		{ },
 		.fill_helper = bpf_fill_maxinsns4,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Very long jump",
@@ -5446,10 +5467,15 @@ static struct bpf_test tests[] = {
 	{
 		"BPF_MAXINSNS: Jump, gap, jump, ...",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86)
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC | FLAG_NO_DATA,
+#endif
 		{ },
 		{ { 0, 0xababcbac } },
 		.fill_helper = bpf_fill_maxinsns11,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{
 		"BPF_MAXINSNS: ld_abs+get_processor_id",
@@ -6109,6 +6135,110 @@ static struct bpf_test tests[] = {
 		{ { ETH_HLEN, 42 } },
 		.fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
 	},
+	/* Checking interpreter vs JIT wrt signed extended imms. */
+	{
+		"JNE signed compare, test 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 3",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_REG(BPF_JNE, R2, R4, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 4",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, -17104896),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 5",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0xfefb0000),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 6",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x7efb0000),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 7",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, 2),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ { 0, 2 } },
+	},
 };
 
 static struct net_device dev;
@@ -6236,7 +6366,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
 
 		*err = bpf_prog_create(&fp, &fprog);
 		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
-			if (*err == -EINVAL) {
+			if (*err == tests[which].expected_errcode) {
 				pr_cont("PASS\n");
 				/* Verifier rejected filter as expected. */
 				*err = 0;
@@ -6444,6 +6574,93 @@ static bool exclude_test(int test_id)
 	return test_id < test_range[0] || test_id > test_range[1];
 }
 
+static __init struct sk_buff *build_test_skb(void)
+{
+	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
+	struct sk_buff *skb[2];
+	struct page *page[2];
+	int i, data_size = 8;
+
+	for (i = 0; i < 2; i++) {
+		page[i] = alloc_page(GFP_KERNEL);
+		if (!page[i]) {
+			if (i == 0)
+				goto err_page0;
+			else
+				goto err_page1;
+		}
+
+		/* this will set skb[i]->head_frag */
+		skb[i] = dev_alloc_skb(headroom + data_size);
+		if (!skb[i]) {
+			if (i == 0)
+				goto err_skb0;
+			else
+				goto err_skb1;
+		}
+
+		skb_reserve(skb[i], headroom);
+		skb_put(skb[i], data_size);
+		skb[i]->protocol = htons(ETH_P_IP);
+		skb_reset_network_header(skb[i]);
+		skb_set_mac_header(skb[i], -ETH_HLEN);
+
+		skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
+		// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
+	}
+
+	/* setup shinfo */
+	skb_shinfo(skb[0])->gso_size = 1448;
+	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb[0])->gso_segs = 0;
+	skb_shinfo(skb[0])->frag_list = skb[1];
+
+	/* adjust skb[0]'s len */
+	skb[0]->len += skb[1]->len;
+	skb[0]->data_len += skb[1]->data_len;
+	skb[0]->truesize += skb[1]->truesize;
+
+	return skb[0];
+
+err_skb1:
+	__free_page(page[1]);
+err_page1:
+	kfree_skb(skb[0]);
+err_skb0:
+	__free_page(page[0]);
+err_page0:
+	return NULL;
+}
+
+static __init int test_skb_segment(void)
+{
+	netdev_features_t features;
+	struct sk_buff *skb, *segs;
+	int ret = -1;
+
+	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
+		   NETIF_F_IPV6_CSUM;
+	features |= NETIF_F_RXCSUM;
+	skb = build_test_skb();
+	if (!skb) {
+		pr_info("%s: failed to build_test_skb", __func__);
+		goto done;
+	}
+
+	segs = skb_segment(skb, features);
+	if (!IS_ERR(segs)) {
+		kfree_skb_list(segs);
+		ret = 0;
+		pr_info("%s: success in skb_segment!", __func__);
+	} else {
+		pr_info("%s: failed in skb_segment!", __func__);
+	}
+	kfree_skb(skb);
+done:
+	return ret;
+}
+
 static __init int test_bpf(void)
 {
 	int i, err_cnt = 0, pass_cnt = 0;
@@ -6453,6 +6670,7 @@ static __init int test_bpf(void)
 		struct bpf_prog *fp;
 		int err;
 
+		cond_resched();
 		if (exclude_test(i))
 			continue;
 
@@ -6501,9 +6719,11 @@ static int __init test_bpf_init(void)
 		return ret;
 
 	ret = test_bpf();
-
 	destroy_bpf_tests();
-	return ret;
+	if (ret)
+		return ret;
+
+	return test_skb_segment();
 }
 
 static void __exit test_bpf_exit(void)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 64a4c76cba2b..cee000ac54d8 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -21,6 +21,7 @@
 #include <linux/uaccess.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/vmalloc.h>
 
 #define TEST_FIRMWARE_NAME	"test-firmware.bin"
 #define TEST_FIRMWARE_NUM_REQS	4
@@ -96,7 +97,7 @@ struct test_config {
 			    struct device *device);
 };
 
-struct test_config *test_fw_config;
+static struct test_config *test_fw_config;
 
 static ssize_t test_fw_misc_read(struct file *f, char __user *buf,
 				 size_t size, loff_t *offset)
@@ -359,7 +360,7 @@ static ssize_t config_name_show(struct device *dev,
 {
 	return config_test_show_str(buf, test_fw_config->name);
 }
-static DEVICE_ATTR(config_name, 0644, config_name_show, config_name_store);
+static DEVICE_ATTR_RW(config_name);
 
 static ssize_t config_num_requests_store(struct device *dev,
 					 struct device_attribute *attr,
@@ -371,6 +372,7 @@ static ssize_t config_num_requests_store(struct device *dev,
 	if (test_fw_config->reqs) {
 		pr_err("Must call release_all_firmware prior to changing config\n");
 		rc = -EINVAL;
+		mutex_unlock(&test_fw_mutex);
 		goto out;
 	}
 	mutex_unlock(&test_fw_mutex);
@@ -388,8 +390,7 @@ static ssize_t config_num_requests_show(struct device *dev,
 {
 	return test_dev_config_show_u8(buf, test_fw_config->num_requests);
 }
-static DEVICE_ATTR(config_num_requests, 0644, config_num_requests_show,
-		   config_num_requests_store);
+static DEVICE_ATTR_RW(config_num_requests);
 
 static ssize_t config_sync_direct_store(struct device *dev,
 					struct device_attribute *attr,
@@ -411,8 +412,7 @@ static ssize_t config_sync_direct_show(struct device *dev,
 {
 	return test_dev_config_show_bool(buf, test_fw_config->sync_direct);
 }
-static DEVICE_ATTR(config_sync_direct, 0644, config_sync_direct_show,
-		   config_sync_direct_store);
+static DEVICE_ATTR_RW(config_sync_direct);
 
 static ssize_t config_send_uevent_store(struct device *dev,
 					struct device_attribute *attr,
@@ -428,8 +428,7 @@ static ssize_t config_send_uevent_show(struct device *dev,
 {
 	return test_dev_config_show_bool(buf, test_fw_config->send_uevent);
 }
-static DEVICE_ATTR(config_send_uevent, 0644, config_send_uevent_show,
-		   config_send_uevent_store);
+static DEVICE_ATTR_RW(config_send_uevent);
 
 static ssize_t config_read_fw_idx_store(struct device *dev,
 					struct device_attribute *attr,
@@ -445,8 +444,7 @@ static ssize_t config_read_fw_idx_show(struct device *dev,
 {
 	return test_dev_config_show_u8(buf, test_fw_config->read_fw_idx);
 }
-static DEVICE_ATTR(config_read_fw_idx, 0644, config_read_fw_idx_show,
-		   config_read_fw_idx_store);
+static DEVICE_ATTR_RW(config_read_fw_idx);
 
 
 static ssize_t trigger_request_store(struct device *dev,
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index ef1a3ac1397e..ec657105edbf 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -94,6 +94,37 @@ static noinline void __init kmalloc_pagealloc_oob_right(void)
 	ptr[size] = 0;
 	kfree(ptr);
 }
+
+static noinline void __init kmalloc_pagealloc_uaf(void)
+{
+	char *ptr;
+	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
+
+	pr_info("kmalloc pagealloc allocation: use-after-free\n");
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	kfree(ptr);
+	ptr[0] = 0;
+}
+
+static noinline void __init kmalloc_pagealloc_invalid_free(void)
+{
+	char *ptr;
+	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
+
+	pr_info("kmalloc pagealloc allocation: invalid-free\n");
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	kfree(ptr + 1);
+}
 #endif
 
 static noinline void __init kmalloc_large_oob_right(void)
@@ -388,7 +419,7 @@ static noinline void __init kasan_stack_oob(void)
 static noinline void __init ksize_unpoisons_memory(void)
 {
 	char *ptr;
-	size_t size = 123, real_size = size;
+	size_t size = 123, real_size;
 
 	pr_info("ksize() unpoisons the whole allocated chunk\n");
 	ptr = kmalloc(size, GFP_KERNEL);
@@ -472,6 +503,82 @@ static noinline void __init use_after_scope_test(void)
 	p[1023] = 1;
 }
 
+static noinline void __init kasan_alloca_oob_left(void)
+{
+	volatile int i = 10;
+	char alloca_array[i];
+	char *p = alloca_array - 1;
+
+	pr_info("out-of-bounds to left on alloca\n");
+	*(volatile char *)p;
+}
+
+static noinline void __init kasan_alloca_oob_right(void)
+{
+	volatile int i = 10;
+	char alloca_array[i];
+	char *p = alloca_array + i;
+
+	pr_info("out-of-bounds to right on alloca\n");
+	*(volatile char *)p;
+}
+
+static noinline void __init kmem_cache_double_free(void)
+{
+	char *p;
+	size_t size = 200;
+	struct kmem_cache *cache;
+
+	cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
+	if (!cache) {
+		pr_err("Cache allocation failed\n");
+		return;
+	}
+	pr_info("double-free on heap object\n");
+	p = kmem_cache_alloc(cache, GFP_KERNEL);
+	if (!p) {
+		pr_err("Allocation failed\n");
+		kmem_cache_destroy(cache);
+		return;
+	}
+
+	kmem_cache_free(cache, p);
+	kmem_cache_free(cache, p);
+	kmem_cache_destroy(cache);
+}
+
+static noinline void __init kmem_cache_invalid_free(void)
+{
+	char *p;
+	size_t size = 200;
+	struct kmem_cache *cache;
+
+	cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
+				  NULL);
+	if (!cache) {
+		pr_err("Cache allocation failed\n");
+		return;
+	}
+	pr_info("invalid-free of heap object\n");
+	p = kmem_cache_alloc(cache, GFP_KERNEL);
+	if (!p) {
+		pr_err("Allocation failed\n");
+		kmem_cache_destroy(cache);
+		return;
+	}
+
+	/* Trigger invalid free, the object doesn't get freed */
+	kmem_cache_free(cache, p + 1);
+
+	/*
+	 * Properly free the object to prevent the "Objects remaining in
+	 * test_cache on __kmem_cache_shutdown" BUG failure.
+	 */
+	kmem_cache_free(cache, p);
+
+	kmem_cache_destroy(cache);
+}
+
 static int __init kmalloc_tests_init(void)
 {
 	/*
@@ -485,6 +592,8 @@ static int __init kmalloc_tests_init(void)
 	kmalloc_node_oob_right();
 #ifdef CONFIG_SLUB
 	kmalloc_pagealloc_oob_right();
+	kmalloc_pagealloc_uaf();
+	kmalloc_pagealloc_invalid_free();
 #endif
 	kmalloc_large_oob_right();
 	kmalloc_oob_krealloc_more();
@@ -502,9 +611,13 @@ static int __init kmalloc_tests_init(void)
 	memcg_accounted_kmem_cache();
 	kasan_stack_oob();
 	kasan_global_oob();
+	kasan_alloca_oob_left();
+	kasan_alloca_oob_right();
 	ksize_unpoisons_memory();
 	copy_user_test();
 	use_after_scope_test();
+	kmem_cache_double_free();
+	kmem_cache_invalid_free();
 
 	kasan_restore_multi_shot(multishot);
 
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 337f408b4de6..0e5b7a61460b 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -694,8 +694,7 @@ static ssize_t config_test_driver_show(struct device *dev,
 	return config_test_show_str(&test_dev->config_mutex, buf,
 				    config->test_driver);
 }
-static DEVICE_ATTR(config_test_driver, 0644, config_test_driver_show,
-		   config_test_driver_store);
+static DEVICE_ATTR_RW(config_test_driver);
 
 static ssize_t config_test_fs_store(struct device *dev,
 				    struct device_attribute *attr,
@@ -726,8 +725,7 @@ static ssize_t config_test_fs_show(struct device *dev,
 	return config_test_show_str(&test_dev->config_mutex, buf,
 				    config->test_fs);
 }
-static DEVICE_ATTR(config_test_fs, 0644, config_test_fs_show,
-		   config_test_fs_store);
+static DEVICE_ATTR_RW(config_test_fs);
 
 static int trigger_config_run_type(struct kmod_test_device *test_dev,
 				   enum kmod_test_case test_case,
@@ -1012,8 +1010,7 @@ static ssize_t config_num_threads_show(struct device *dev,
 
 	return test_dev_config_show_int(test_dev, buf, config->num_threads);
 }
-static DEVICE_ATTR(config_num_threads, 0644, config_num_threads_show,
-		   config_num_threads_store);
+static DEVICE_ATTR_RW(config_num_threads);
 
 static ssize_t config_test_case_store(struct device *dev,
 				      struct device_attribute *attr,
@@ -1037,8 +1034,7 @@ static ssize_t config_test_case_show(struct device *dev,
 
 	return test_dev_config_show_uint(test_dev, buf, config->test_case);
 }
-static DEVICE_ATTR(config_test_case, 0644, config_test_case_show,
-		   config_test_case_store);
+static DEVICE_ATTR_RW(config_test_case);
 
 static ssize_t test_result_show(struct device *dev,
 				struct device_attribute *attr,
@@ -1049,7 +1045,7 @@ static ssize_t test_result_show(struct device *dev,
 
 	return test_dev_config_show_int(test_dev, buf, config->test_result);
 }
-static DEVICE_ATTR(test_result, 0644, test_result_show, test_result_store);
+static DEVICE_ATTR_RW(test_result);
 
 #define TEST_KMOD_DEV_ATTR(name)		&dev_attr_##name.attr
 
@@ -1145,7 +1141,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
 	mutex_lock(&reg_dev_mutex);
 
 	/* int should suffice for number of devices, test for wrap */
-	if (unlikely(num_test_devs + 1) < 0) {
+	if (num_test_devs + 1 == INT_MAX) {
 		pr_err("reached limit of number of test devices\n");
 		goto out;
 	}
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 8e83cbdc049c..f4000c137dbe 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -79,6 +79,21 @@ struct thread_data {
 	struct test_obj *objs;
 };
 
+static u32 my_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct test_obj_rhl *obj = data;
+
+	return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE;
+}
+
+static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+	const struct test_obj_rhl *test_obj = obj;
+	const struct test_obj_val *val = arg->key;
+
+	return test_obj->value.id - val->id;
+}
+
 static struct rhashtable_params test_rht_params = {
 	.head_offset = offsetof(struct test_obj, node),
 	.key_offset = offsetof(struct test_obj, value),
@@ -87,6 +102,17 @@ static struct rhashtable_params test_rht_params = {
 	.nulls_base = (3U << RHT_BASE_SHIFT),
 };
 
+static struct rhashtable_params test_rht_params_dup = {
+	.head_offset = offsetof(struct test_obj_rhl, list_node),
+	.key_offset = offsetof(struct test_obj_rhl, value),
+	.key_len = sizeof(struct test_obj_val),
+	.hashfn = jhash,
+	.obj_hashfn = my_hashfn,
+	.obj_cmpfn = my_cmpfn,
+	.nelem_hint = 128,
+	.automatic_shrinking = false,
+};
+
 static struct semaphore prestart_sem;
 static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
 
@@ -162,11 +188,7 @@ static void test_bucket_stats(struct rhashtable *ht, unsigned int entries)
 		return;
 	}
 
-	err = rhashtable_walk_start(&hti);
-	if (err && err != -EAGAIN) {
-		pr_warn("Test failed: iterator failed: %d\n", err);
-		return;
-	}
+	rhashtable_walk_start(&hti);
 
 	while ((pos = rhashtable_walk_next(&hti))) {
 		if (PTR_ERR(pos) == -EAGAIN) {
@@ -469,6 +491,112 @@ static int __init test_rhashtable_max(struct test_obj *array,
 	return err;
 }
 
+static unsigned int __init print_ht(struct rhltable *rhlt)
+{
+	struct rhashtable *ht;
+	const struct bucket_table *tbl;
+	char buff[512] = "";
+	unsigned int i, cnt = 0;
+
+	ht = &rhlt->ht;
+	tbl = rht_dereference(ht->tbl, ht);
+	for (i = 0; i < tbl->size; i++) {
+		struct rhash_head *pos, *next;
+		struct test_obj_rhl *p;
+
+		pos = rht_dereference(tbl->buckets[i], ht);
+		next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
+
+		if (!rht_is_a_nulls(pos)) {
+			sprintf(buff, "%s\nbucket[%d] -> ", buff, i);
+		}
+
+		while (!rht_is_a_nulls(pos)) {
+			struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead);
+			sprintf(buff, "%s[[", buff);
+			do {
+				pos = &list->rhead;
+				list = rht_dereference(list->next, ht);
+				p = rht_obj(ht, pos);
+
+				sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid,
+					list? ", " : " ");
+				cnt++;
+			} while (list);
+
+			pos = next,
+			next = !rht_is_a_nulls(pos) ?
+				rht_dereference(pos->next, ht) : NULL;
+
+			sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : "");
+		}
+	}
+	printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff);
+
+	return cnt;
+}
+
+static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects,
+				  int cnt, bool slow)
+{
+	struct rhltable rhlt;
+	unsigned int i, ret;
+	const char *key;
+	int err = 0;
+
+	err = rhltable_init(&rhlt, &test_rht_params_dup);
+	if (WARN_ON(err))
+		return err;
+
+	for (i = 0; i < cnt; i++) {
+		rhl_test_objects[i].value.tid = i;
+		key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead);
+		key += test_rht_params_dup.key_offset;
+
+		if (slow) {
+			err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key,
+							     &rhl_test_objects[i].list_node.rhead));
+			if (err == -EAGAIN)
+				err = 0;
+		} else
+			err = rhltable_insert(&rhlt,
+					      &rhl_test_objects[i].list_node,
+					      test_rht_params_dup);
+		if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast"))
+			goto skip_print;
+	}
+
+	ret = print_ht(&rhlt);
+	WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast");
+
+skip_print:
+	rhltable_destroy(&rhlt);
+
+	return 0;
+}
+
+static int __init test_insert_duplicates_run(void)
+{
+	struct test_obj_rhl rhl_test_objects[3] = {};
+
+	pr_info("test inserting duplicates\n");
+
+	/* two different values that map to same bucket */
+	rhl_test_objects[0].value.id = 1;
+	rhl_test_objects[1].value.id = 21;
+
+	/* and another duplicate with same as [0] value
+	 * which will be second on the bucket list */
+	rhl_test_objects[2].value.id = rhl_test_objects[0].value.id;
+
+	test_insert_dup(rhl_test_objects, 2, false);
+	test_insert_dup(rhl_test_objects, 3, false);
+	test_insert_dup(rhl_test_objects, 2, true);
+	test_insert_dup(rhl_test_objects, 3, true);
+
+	return 0;
+}
+
 static int thread_lookup_test(struct thread_data *tdata)
 {
 	unsigned int entries = tdata->entries;
@@ -617,6 +745,8 @@ static int __init test_rht_init(void)
 	do_div(total_time, runs);
 	pr_info("Average test time: %llu\n", total_time);
 
+	test_insert_duplicates_run();
+
 	if (!tcount)
 		return 0;
 
diff --git a/lib/test_sort.c b/lib/test_sort.c
index d389c1cc2f6c..385c0ed5202f 100644
--- a/lib/test_sort.c
+++ b/lib/test_sort.c
@@ -39,5 +39,11 @@ exit:
 	return err;
 }
 
+static void __exit test_sort_exit(void)
+{
+}
+
 module_init(test_sort_init);
+module_exit(test_sort_exit);
+
 MODULE_LICENSE("GPL");
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
new file mode 100644
index 000000000000..280f4979d00e
--- /dev/null
+++ b/lib/test_ubsan.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+typedef void(*test_ubsan_fp)(void);
+
+static void test_ubsan_add_overflow(void)
+{
+	volatile int val = INT_MAX;
+
+	val += 2;
+}
+
+static void test_ubsan_sub_overflow(void)
+{
+	volatile int val = INT_MIN;
+	volatile int val2 = 2;
+
+	val -= val2;
+}
+
+static void test_ubsan_mul_overflow(void)
+{
+	volatile int val = INT_MAX / 2;
+
+	val *= 3;
+}
+
+static void test_ubsan_negate_overflow(void)
+{
+	volatile int val = INT_MIN;
+
+	val = -val;
+}
+
+static void test_ubsan_divrem_overflow(void)
+{
+	volatile int val = 16;
+	volatile int val2 = 0;
+
+	val /= val2;
+}
+
+static void test_ubsan_vla_bound_not_positive(void)
+{
+	volatile int size = -1;
+	char buf[size];
+
+	(void)buf;
+}
+
+static void test_ubsan_shift_out_of_bounds(void)
+{
+	volatile int val = -1;
+	int val2 = 10;
+
+	val2 <<= val;
+}
+
+static void test_ubsan_out_of_bounds(void)
+{
+	volatile int i = 4, j = 5;
+	volatile int arr[i];
+
+	arr[j] = i;
+}
+
+static void test_ubsan_load_invalid_value(void)
+{
+	volatile char *dst, *src;
+	bool val, val2, *ptr;
+	char c = 4;
+
+	dst = (char *)&val;
+	src = &c;
+	*dst = *src;
+
+	ptr = &val2;
+	val2 = val;
+}
+
+static void test_ubsan_null_ptr_deref(void)
+{
+	volatile int *ptr = NULL;
+	int val;
+
+	val = *ptr;
+}
+
+static void test_ubsan_misaligned_access(void)
+{
+	volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
+	volatile int *ptr, val = 6;
+
+	ptr = (int *)(arr + 1);
+	*ptr = val;
+}
+
+static void test_ubsan_object_size_mismatch(void)
+{
+	/* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
+	volatile int val __aligned(8) = 4;
+	volatile long long *ptr, val2;
+
+	ptr = (long long *)&val;
+	val2 = *ptr;
+}
+
+static const test_ubsan_fp test_ubsan_array[] = {
+	test_ubsan_add_overflow,
+	test_ubsan_sub_overflow,
+	test_ubsan_mul_overflow,
+	test_ubsan_negate_overflow,
+	test_ubsan_divrem_overflow,
+	test_ubsan_vla_bound_not_positive,
+	test_ubsan_shift_out_of_bounds,
+	test_ubsan_out_of_bounds,
+	test_ubsan_load_invalid_value,
+	//test_ubsan_null_ptr_deref, /* exclude it because there is a crash */
+	test_ubsan_misaligned_access,
+	test_ubsan_object_size_mismatch,
+};
+
+static int __init test_ubsan_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_ubsan_array); i++)
+		test_ubsan_array[i]();
+
+	(void)test_ubsan_null_ptr_deref; /* to avoid unsed-function warning */
+	return 0;
+}
+module_init(test_ubsan_init);
+
+static void __exit test_ubsan_exit(void)
+{
+	/* do nothing */
+}
+module_exit(test_ubsan_exit);
+
+MODULE_AUTHOR("Jinbum Park <jinb.park7@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c
index 4621db801b23..e161f0498f42 100644
--- a/lib/test_user_copy.c
+++ b/lib/test_user_copy.c
@@ -31,11 +31,8 @@
  * their capability at compile-time, we just have to opt-out certain archs.
  */
 #if BITS_PER_LONG == 64 || (!(defined(CONFIG_ARM) && !defined(MMU)) && \
-			    !defined(CONFIG_BLACKFIN) &&	\
-			    !defined(CONFIG_M32R) &&		\
 			    !defined(CONFIG_M68K) &&		\
 			    !defined(CONFIG_MICROBLAZE) &&	\
-			    !defined(CONFIG_MN10300) &&		\
 			    !defined(CONFIG_NIOS2) &&		\
 			    !defined(CONFIG_PPC32) &&		\
 			    !defined(CONFIG_SUPERH))
diff --git a/lib/textsearch.c b/lib/textsearch.c
index 0b79908dfe89..5939549c0e7b 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -10,7 +10,10 @@
  * 		Pablo Neira Ayuso <pablo@netfilter.org>
  *
  * ==========================================================================
- *
+ */
+
+/**
+ * DOC: ts_intro
  * INTRODUCTION
  *
  *   The textsearch infrastructure provides text searching facilities for
@@ -19,7 +22,9 @@
  *
  * ARCHITECTURE
  *
- *      User
+ * .. code-block:: none
+ *
+ *     User
  *     +----------------+
  *     |        finish()|<--------------(6)-----------------+
  *     |get_next_block()|<--------------(5)---------------+ |
@@ -33,21 +38,21 @@
  *     |             (3)|----->| find()/next() |-----------+          |
  *     |             (7)|----->| destroy()     |----------------------+
  *     +----------------+      +---------------+
- *  
- *   (1) User configures a search by calling _prepare() specifying the
- *       search parameters such as the pattern and algorithm name.
+ *
+ *   (1) User configures a search by calling textsearch_prepare() specifying
+ *       the search parameters such as the pattern and algorithm name.
  *   (2) Core requests the algorithm to allocate and initialize a search
  *       configuration according to the specified parameters.
- *   (3) User starts the search(es) by calling _find() or _next() to
- *       fetch subsequent occurrences. A state variable is provided
- *       to the algorithm to store persistent variables.
+ *   (3) User starts the search(es) by calling textsearch_find() or
+ *       textsearch_next() to fetch subsequent occurrences. A state variable
+ *       is provided to the algorithm to store persistent variables.
  *   (4) Core eventually resets the search offset and forwards the find()
  *       request to the algorithm.
  *   (5) Algorithm calls get_next_block() provided by the user continuously
  *       to fetch the data to be searched in block by block.
  *   (6) Algorithm invokes finish() after the last call to get_next_block
  *       to clean up any leftovers from get_next_block. (Optional)
- *   (7) User destroys the configuration by calling _destroy().
+ *   (7) User destroys the configuration by calling textsearch_destroy().
  *   (8) Core notifies the algorithm to destroy algorithm specific
  *       allocations. (Optional)
  *
@@ -62,9 +67,10 @@
  *   amount of times and even in parallel as long as a separate struct
  *   ts_state variable is provided to every instance.
  *
- *   The actual search is performed by either calling textsearch_find_-
- *   continuous() for linear data or by providing an own get_next_block()
- *   implementation and calling textsearch_find(). Both functions return
+ *   The actual search is performed by either calling
+ *   textsearch_find_continuous() for linear data or by providing
+ *   an own get_next_block() implementation and
+ *   calling textsearch_find(). Both functions return
  *   the position of the first occurrence of the pattern or UINT_MAX if
  *   no match was found. Subsequent occurrences can be found by calling
  *   textsearch_next() regardless of the linearity of the data.
@@ -72,7 +78,7 @@
  *   Once you're done using a configuration it must be given back via
  *   textsearch_destroy.
  *
- * EXAMPLE
+ * EXAMPLE::
  *
  *   int pos;
  *   struct ts_config *conf;
@@ -87,13 +93,13 @@
  *       goto errout;
  *   }
  *
- *   pos = textsearch_find_continuous(conf, &state, example, strlen(example));
+ *   pos = textsearch_find_continuous(conf, \&state, example, strlen(example));
  *   if (pos != UINT_MAX)
- *       panic("Oh my god, dancing chickens at %d\n", pos);
+ *       panic("Oh my god, dancing chickens at \%d\n", pos);
  *
  *   textsearch_destroy(conf);
- * ==========================================================================
  */
+/* ========================================================================== */
 
 #include <linux/module.h>
 #include <linux/types.h>
@@ -225,7 +231,7 @@ static unsigned int get_linear_data(unsigned int consumed, const u8 **dst,
  *
  * Returns the position of first occurrence of the pattern or
  * %UINT_MAX if no occurrence was found.
- */ 
+ */
 unsigned int textsearch_find_continuous(struct ts_config *conf,
 					struct ts_state *state,
 					const void *data, unsigned int len)
diff --git a/lib/ubsan.c b/lib/ubsan.c
index fb0409df1bcf..59fee96c29a0 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -141,11 +141,6 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type,
 	}
 }
 
-static bool location_is_valid(struct source_location *loc)
-{
-	return loc->file_name != NULL;
-}
-
 static DEFINE_SPINLOCK(report_lock);
 
 static void ubsan_prologue(struct source_location *location,
@@ -265,14 +260,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
 }
 EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
 
-static void handle_null_ptr_deref(struct type_mismatch_data *data)
+static void handle_null_ptr_deref(struct type_mismatch_data_common *data)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 
 	pr_err("%s null pointer of type %s\n",
 		type_check_kinds[data->type_check_kind],
@@ -281,15 +276,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data)
 	ubsan_epilogue(&flags);
 }
 
-static void handle_missaligned_access(struct type_mismatch_data *data,
+static void handle_misaligned_access(struct type_mismatch_data_common *data,
 				unsigned long ptr)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 
 	pr_err("%s misaligned address %p for type %s\n",
 		type_check_kinds[data->type_check_kind],
@@ -299,15 +294,15 @@ static void handle_missaligned_access(struct type_mismatch_data *data,
 	ubsan_epilogue(&flags);
 }
 
-static void handle_object_size_mismatch(struct type_mismatch_data *data,
+static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
 					unsigned long ptr)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 	pr_err("%s address %p with insufficient space\n",
 		type_check_kinds[data->type_check_kind],
 		(void *) ptr);
@@ -315,37 +310,46 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data,
 	ubsan_epilogue(&flags);
 }
 
-void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
 				unsigned long ptr)
 {
 
 	if (!ptr)
 		handle_null_ptr_deref(data);
 	else if (data->alignment && !IS_ALIGNED(ptr, data->alignment))
-		handle_missaligned_access(data, ptr);
+		handle_misaligned_access(data, ptr);
 	else
 		handle_object_size_mismatch(data, ptr);
 }
-EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
 
-void __ubsan_handle_nonnull_return(struct nonnull_return_data *data)
+void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+				unsigned long ptr)
 {
-	unsigned long flags;
-
-	if (suppress_report(&data->location))
-		return;
-
-	ubsan_prologue(&data->location, &flags);
+	struct type_mismatch_data_common common_data = {
+		.location = &data->location,
+		.type = data->type,
+		.alignment = data->alignment,
+		.type_check_kind = data->type_check_kind
+	};
+
+	ubsan_type_mismatch_common(&common_data, ptr);
+}
+EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
 
-	pr_err("null pointer returned from function declared to never return null\n");
+void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
+				unsigned long ptr)
+{
 
-	if (location_is_valid(&data->attr_location))
-		print_source_location("returns_nonnull attribute specified in",
-				&data->attr_location);
+	struct type_mismatch_data_common common_data = {
+		.location = &data->location,
+		.type = data->type,
+		.alignment = 1UL << data->log_alignment,
+		.type_check_kind = data->type_check_kind
+	};
 
-	ubsan_epilogue(&flags);
+	ubsan_type_mismatch_common(&common_data, ptr);
 }
-EXPORT_SYMBOL(__ubsan_handle_nonnull_return);
+EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
 
 void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data,
 					unsigned long bound)
diff --git a/lib/ubsan.h b/lib/ubsan.h
index 88f23557edbe..f4d8d0bd4016 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -37,15 +37,24 @@ struct type_mismatch_data {
 	unsigned char type_check_kind;
 };
 
-struct nonnull_arg_data {
+struct type_mismatch_data_v1 {
 	struct source_location location;
-	struct source_location attr_location;
-	int arg_index;
+	struct type_descriptor *type;
+	unsigned char log_alignment;
+	unsigned char type_check_kind;
+};
+
+struct type_mismatch_data_common {
+	struct source_location *location;
+	struct type_descriptor *type;
+	unsigned long alignment;
+	unsigned char type_check_kind;
 };
 
-struct nonnull_return_data {
+struct nonnull_arg_data {
 	struct source_location location;
 	struct source_location attr_location;
+	int arg_index;
 };
 
 struct vla_bound_data {
diff --git a/lib/usercopy.c b/lib/usercopy.c
index 15e2e6fb060e..3744b2a8e591 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -20,7 +20,7 @@ EXPORT_SYMBOL(_copy_from_user);
 #endif
 
 #ifndef INLINE_COPY_TO_USER
-unsigned long _copy_to_user(void *to, const void __user *from, unsigned long n)
+unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
 	if (likely(access_ok(VERIFY_WRITE, to, n))) {
diff --git a/lib/uuid.c b/lib/uuid.c
index 680b9fb9ba09..2290b9f001a9 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -29,15 +29,14 @@ EXPORT_SYMBOL(uuid_null);
 const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
 const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
 
-/***************************************************************
+/**
+ * generate_random_uuid - generate a random UUID
+ * @uuid: where to put the generated UUID
+ *
  * Random UUID interface
  *
- * Used here for a Boot ID, but can be useful for other kernel
- * drivers.
- ***************************************************************/
-
-/*
- * Generate random UUID
+ * Used to create a Boot ID or a filesystem UUID/GUID, but can be
+ * useful for other kernel drivers.
  */
 void generate_random_uuid(unsigned char uuid[16])
 {
@@ -73,16 +72,17 @@ void uuid_gen(uuid_t *bu)
 EXPORT_SYMBOL_GPL(uuid_gen);
 
 /**
-  * uuid_is_valid - checks if UUID string valid
-  * @uuid:	UUID string to check
-  *
-  * Description:
-  * It checks if the UUID string is following the format:
-  *	xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
-  * where x is a hex digit.
-  *
-  * Return: true if input is valid UUID string.
-  */
+ * uuid_is_valid - checks if a UUID string is valid
+ * @uuid:	UUID string to check
+ *
+ * Description:
+ * It checks if the UUID string is following the format:
+ *	xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ *
+ * where x is a hex digit.
+ *
+ * Return: true if input is valid UUID string.
+ */
 bool uuid_is_valid(const char *uuid)
 {
 	unsigned int i;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 01c3957b2de6..30c0cb8cc9bc 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -42,7 +42,6 @@
 #include "../mm/internal.h"	/* For the trace_print_flags arrays */
 
 #include <asm/page.h>		/* for PAGE_SIZE */
-#include <asm/sections.h>	/* for dereference_function_descriptor() */
 #include <asm/byteorder.h>	/* cpu_to_le16 */
 
 #include <linux/string_helpers.h>
@@ -337,7 +336,7 @@ char *put_dec(char *buf, unsigned long long n)
  *
  * If speed is not important, use snprintf(). It's easy to read the code.
  */
-int num_to_str(char *buf, int size, unsigned long long num)
+int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
 {
 	/* put_dec requires 2-byte alignment of the buffer. */
 	char tmp[sizeof(num) * 3] __aligned(2);
@@ -351,11 +350,21 @@ int num_to_str(char *buf, int size, unsigned long long num)
 		len = put_dec(tmp, num) - tmp;
 	}
 
-	if (len > size)
+	if (len > size || width > size)
 		return 0;
+
+	if (width > len) {
+		width = width - len;
+		for (idx = 0; idx < width; idx++)
+			buf[idx] = ' ';
+	} else {
+		width = 0;
+	}
+
 	for (idx = 0; idx < len; ++idx)
-		buf[idx] = tmp[len - idx - 1];
-	return len;
+		buf[idx + width] = tmp[len - idx - 1];
+
+	return len + width;
 }
 
 #define SIGN	1		/* unsigned/signed, must be 1 */
@@ -1834,7 +1843,8 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
  *
  * - 'x' For printing the address. Equivalent to "%lx".
  *
- * ** Please update also Documentation/printk-formats.txt when making changes **
+ * ** When making changes please also update:
+ *	Documentation/core-api/printk-formats.rst
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -1849,7 +1859,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 {
 	const int default_width = 2 * sizeof(void *);
 
-	if (!ptr && *fmt != 'K') {
+	if (!ptr && *fmt != 'K' && *fmt != 'x') {
 		/*
 		 * Print (null) with the same width as a pointer so it makes
 		 * tabular output look nice.
@@ -1862,10 +1872,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	switch (*fmt) {
 	case 'F':
 	case 'f':
-		ptr = dereference_function_descriptor(ptr);
-		/* Fallthrough */
 	case 'S':
 	case 's':
+		ptr = dereference_symbol_descriptor(ptr);
+		/* Fallthrough */
 	case 'B':
 		return symbol_string(buf, end, ptr, spec, fmt);
 	case 'R':
@@ -2194,7 +2204,7 @@ set_precision(struct printf_spec *spec, int prec)
  *  - ``%n`` is unsupported
  *  - ``%p*`` is handled by pointer()
  *
- * See pointer() or Documentation/printk-formats.txt for more
+ * See pointer() or Documentation/core-api/printk-formats.rst for more
  * extensive description.
  *
  * **Please update the documentation in both places when making changes**
@@ -2516,29 +2526,34 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 {
 	struct printf_spec spec = {0};
 	char *str, *end;
+	int width;
 
 	str = (char *)bin_buf;
 	end = (char *)(bin_buf + size);
 
 #define save_arg(type)							\
-do {									\
+({									\
+	unsigned long long value;					\
 	if (sizeof(type) == 8) {					\
-		unsigned long long value;				\
+		unsigned long long val8;				\
 		str = PTR_ALIGN(str, sizeof(u32));			\
-		value = va_arg(args, unsigned long long);		\
+		val8 = va_arg(args, unsigned long long);		\
 		if (str + sizeof(type) <= end) {			\
-			*(u32 *)str = *(u32 *)&value;			\
-			*(u32 *)(str + 4) = *((u32 *)&value + 1);	\
+			*(u32 *)str = *(u32 *)&val8;			\
+			*(u32 *)(str + 4) = *((u32 *)&val8 + 1);	\
 		}							\
+		value = val8;						\
 	} else {							\
-		unsigned long value;					\
+		unsigned int val4;					\
 		str = PTR_ALIGN(str, sizeof(type));			\
-		value = va_arg(args, int);				\
+		val4 = va_arg(args, int);				\
 		if (str + sizeof(type) <= end)				\
-			*(typeof(type) *)str = (type)value;		\
+			*(typeof(type) *)str = (type)(long)val4;	\
+		value = (unsigned long long)val4;			\
 	}								\
 	str += sizeof(type);						\
-} while (0)
+	value;								\
+})
 
 	while (*fmt) {
 		int read = format_decode(fmt, &spec);
@@ -2554,7 +2569,10 @@ do {									\
 
 		case FORMAT_TYPE_WIDTH:
 		case FORMAT_TYPE_PRECISION:
-			save_arg(int);
+			width = (int)save_arg(int);
+			/* Pointers may require the width */
+			if (*fmt == 'p')
+				set_field_width(&spec, width);
 			break;
 
 		case FORMAT_TYPE_CHAR:
@@ -2576,7 +2594,29 @@ do {									\
 		}
 
 		case FORMAT_TYPE_PTR:
-			save_arg(void *);
+			/* Dereferenced pointers must be done now */
+			switch (*fmt) {
+			/* Dereference of functions is still OK */
+			case 'S':
+			case 's':
+			case 'F':
+			case 'f':
+			case 'x':
+			case 'K':
+				save_arg(void *);
+				break;
+			default:
+				if (!isalnum(*fmt)) {
+					save_arg(void *);
+					break;
+				}
+				str = pointer(fmt, str, end, va_arg(args, void *),
+					      spec);
+				if (str + 1 < end)
+					*str++ = '\0';
+				else
+					end[-1] = '\0'; /* Must be nul terminated */
+			}
 			/* skip all alphanumeric pointer suffixes */
 			while (isalnum(*fmt))
 				fmt++;
@@ -2728,11 +2768,41 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			break;
 		}
 
-		case FORMAT_TYPE_PTR:
-			str = pointer(fmt, str, end, get_arg(void *), spec);
+		case FORMAT_TYPE_PTR: {
+			bool process = false;
+			int copy, len;
+			/* Non function dereferences were already done */
+			switch (*fmt) {
+			case 'S':
+			case 's':
+			case 'F':
+			case 'f':
+			case 'x':
+			case 'K':
+				process = true;
+				break;
+			default:
+				if (!isalnum(*fmt)) {
+					process = true;
+					break;
+				}
+				/* Pointer dereference was already processed */
+				if (str < end) {
+					len = copy = strlen(args);
+					if (copy > end - str)
+						copy = end - str;
+					memcpy(str, args, copy);
+					str += len;
+					args += len;
+				}
+			}
+			if (process)
+				str = pointer(fmt, str, end, get_arg(void *), spec);
+
 			while (isalnum(*fmt))
 				fmt++;
 			break;
+		}
 
 		case FORMAT_TYPE_PERCENT_CHAR:
 			if (str < end)
diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
index dd0a359c135b..7920cbbfeae9 100644
--- a/lib/zstd/Makefile
+++ b/lib/zstd/Makefile
@@ -3,16 +3,7 @@ obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
 
 ccflags-y += -O3
 
-# Object files unique to zstd_compress and zstd_decompress
-zstd_compress-y := fse_compress.o huf_compress.o compress.o
-zstd_decompress-y := huf_decompress.o decompress.o
-
-# These object files are shared between the modules.
-# Always add them to zstd_compress.
-# Unless both zstd_compress and zstd_decompress are built in
-# then also add them to zstd_decompress.
-zstd_compress-y += entropy_common.o fse_decompress.o zstd_common.o
-
-ifneq ($(CONFIG_ZSTD_COMPRESS)$(CONFIG_ZSTD_DECOMPRESS),yy)
-	zstd_decompress-y += entropy_common.o fse_decompress.o zstd_common.o
-endif
+zstd_compress-y := fse_compress.o huf_compress.o compress.o \
+		   entropy_common.o fse_decompress.o zstd_common.o
+zstd_decompress-y := huf_decompress.o decompress.o \
+		     entropy_common.o fse_decompress.o zstd_common.o
author	John Johansen <john.johansen@canonical.com>	2018-05-02 00:38:52 -0700
committer	John Johansen <john.johansen@canonical.com>	2018-05-02 00:38:52 -0700
commit	552c69b36ebd966186573b9c7a286b390935cce1 (patch)
tree	43ae55082805c8e7f900a63197d21de36937de16 /lib
parent	apparmor: fix memory leak on buffer on error exit path (diff)
parent	Linux v4.17-rc3 (diff)
download	linux-dev-552c69b36ebd966186573b9c7a286b390935cce1.tar.xz linux-dev-552c69b36ebd966186573b9c7a286b390935cce1.zip