523 files changed, 31676 insertions, 13621 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-irq b/Documentation/ABI/testing/sysfs-kernel-irq
index eb074b100986..8910d0c4bcd8 100644
--- a/Documentation/ABI/testing/sysfs-kernel-irq
+++ b/Documentation/ABI/testing/sysfs-kernel-irq
@@ -51,3 +51,10 @@ Date:		September 2016
 KernelVersion:	4.9
 Contact:	Craig Gallek <kraig@google.com>
 Description:	The type of the interrupt.  Either the string 'level' or 'edge'.
+
+What:		/sys/kernel/irq/<irq>/wakeup
+Date:		March 2018
+KernelVersion:	4.17
+Contact:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Description:	The wakeup state of the interrupt. Either the string
+		'enabled' or 'disabled'.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bf03fe49f1ce..e6d33fa8fed4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1739,6 +1739,14 @@
 			of a GICv2 controller even if the memory range
 			exposed by the device tree is too small.
 
+	irqchip.gicv3_nolpi=
+			[ARM, ARM64]
+			Force the kernel to ignore the availability of
+			LPIs (and by consequence ITSs). Intended for system
+			that use the kernel as a bootloader, and thus want
+			to let secondary kernels in charge of setting up
+			LPIs.
+
 	irqfixup	[HW]
 			When an interrupt is not handled search all handlers
 			for it. Intended to get systems with badly broken
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
index a70090b28b07..7964f03846b1 100644
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -110,7 +110,7 @@ infrastructure:
      x--------------------------------------------------x
      | Name                         |  bits   | visible |
      |--------------------------------------------------|
-     | RES0                         | [63-52] |    n    |
+     | TS                           | [55-52] |    y    |
      |--------------------------------------------------|
      | FHM                          | [51-48] |    y    |
      |--------------------------------------------------|
@@ -124,8 +124,6 @@ infrastructure:
      |--------------------------------------------------|
      | RDM                          | [31-28] |    y    |
      |--------------------------------------------------|
-     | RES0                         | [27-24] |    n    |
-     |--------------------------------------------------|
      | ATOMICS                      | [23-20] |    y    |
      |--------------------------------------------------|
      | CRC32                        | [19-16] |    y    |
@@ -135,8 +133,6 @@ infrastructure:
      | SHA1                         | [11-8]  |    y    |
      |--------------------------------------------------|
      | AES                          | [7-4]   |    y    |
-     |--------------------------------------------------|
-     | RES0                         | [3-0]   |    n    |
      x--------------------------------------------------x
 
 
@@ -144,12 +140,10 @@ infrastructure:
      x--------------------------------------------------x
      | Name                         |  bits   | visible |
      |--------------------------------------------------|
-     | RES0                         | [63-36] |    n    |
+     | DIT                          | [51-48] |    y    |
      |--------------------------------------------------|
      | SVE                          | [35-32] |    y    |
      |--------------------------------------------------|
-     | RES0                         | [31-28] |    n    |
-     |--------------------------------------------------|
      | GIC                          | [27-24] |    n    |
      |--------------------------------------------------|
      | AdvSIMD                      | [23-20] |    y    |
@@ -199,6 +193,14 @@ infrastructure:
      | DPB                          | [3-0]   |    y    |
      x--------------------------------------------------x
 
+  5) ID_AA64MMFR2_EL1 - Memory model feature register 2
+
+     x--------------------------------------------------x
+     | Name                         |  bits   | visible |
+     |--------------------------------------------------|
+     | AT                           | [35-32] |    y    |
+     x--------------------------------------------------x
+
 Appendix I: Example
 ---------------------------
 
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index 57324ee55ecc..d6aff2c5e9e2 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -162,3 +162,19 @@ HWCAP_SVE
 HWCAP_ASIMDFHM
 
    Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
+
+HWCAP_DIT
+
+    Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001.
+
+HWCAP_USCAT
+
+    Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001.
+
+HWCAP_ILRCPC
+
+    Functionality implied by ID_AA64ISR1_EL1.LRCPC == 0b0002.
+
+HWCAP_FLAGM
+
+    Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index c1d520de6dfe..3b2f2dd82225 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -55,6 +55,7 @@ stable kernels.
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220        |
 | ARM            | Cortex-A72      | #853709         | N/A                         |
 | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
+| ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
 | ARM            | MMU-500         | #841119,#826419 | N/A                         |
 |                |                 |                 |                             |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375        |
diff --git a/Documentation/crypto/crypto_engine.rst b/Documentation/crypto/crypto_engine.rst
new file mode 100644
index 000000000000..8272ac92a14f
--- /dev/null
+++ b/Documentation/crypto/crypto_engine.rst
@@ -0,0 +1,48 @@
+=============
+CRYPTO ENGINE
+=============
+
+Overview
+--------
+The crypto engine API (CE), is a crypto queue manager.
+
+Requirement
+-----------
+You have to put at start of your tfm_ctx the struct crypto_engine_ctx
+struct your_tfm_ctx {
+        struct crypto_engine_ctx enginectx;
+        ...
+};
+Why: Since CE manage only crypto_async_request, it cannot know the underlying
+request_type and so have access only on the TFM.
+So using container_of for accessing __ctx is impossible.
+Furthermore, the crypto engine cannot know the "struct your_tfm_ctx",
+so it must assume that crypto_engine_ctx is at start of it.
+
+Order of operations
+-------------------
+You have to obtain a struct crypto_engine via crypto_engine_alloc_init().
+And start it via crypto_engine_start().
+
+Before transferring any request, you have to fill the enginectx.
+- prepare_request: (taking a function pointer) If you need to do some processing before doing the request
+- unprepare_request: (taking a function pointer) Undoing what's done in prepare_request
+- do_one_request: (taking a function pointer) Do encryption for current request
+
+Note: that those three functions get the crypto_async_request associated with the received request.
+So your need to get the original request via container_of(areq, struct yourrequesttype_request, base);
+
+When your driver receive a crypto_request, you have to transfer it to
+the cryptoengine via one of:
+- crypto_transfer_ablkcipher_request_to_engine()
+- crypto_transfer_aead_request_to_engine()
+- crypto_transfer_akcipher_request_to_engine()
+- crypto_transfer_hash_request_to_engine()
+- crypto_transfer_skcipher_request_to_engine()
+
+At the end of the request process, a call to one of the following function is needed:
+- crypto_finalize_ablkcipher_request
+- crypto_finalize_aead_request
+- crypto_finalize_akcipher_request
+- crypto_finalize_hash_request
+- crypto_finalize_skcipher_request
diff --git a/Documentation/crypto/devel-algos.rst b/Documentation/crypto/devel-algos.rst
index 66f50d32dcec..c45c6f400dbd 100644
--- a/Documentation/crypto/devel-algos.rst
+++ b/Documentation/crypto/devel-algos.rst
@@ -236,6 +236,14 @@ when used from another part of the kernel.
                                |
                                '---------------> HASH2
 
+Note that it is perfectly legal to "abandon" a request object:
+- call .init() and then (as many times) .update()
+- _not_ call any of .final(), .finup() or .export() at any point in future
+
+In other words implementations should mind the resource allocation and clean-up.
+No resources related to request objects should remain allocated after a call
+to .init() or .update(), since there might be no chance to free them.
+
 
 Specifics Of Asynchronous HASH Transformation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
index cec8d5d74e26..c2598ab27f2e 100644
--- a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
+++ b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
@@ -1,7 +1,8 @@
 Arm TrustZone CryptoCell cryptographic engine
 
 Required properties:
-- compatible: Should be "arm,cryptocell-712-ree".
+- compatible: Should be one of: "arm,cryptocell-712-ree",
+  "arm,cryptocell-710-ree" or "arm,cryptocell-630p-ree".
 - reg: Base physical address of the engine and length of memory mapped region.
 - interrupts: Interrupt number for the device.
 
diff --git a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
index 30c3ce6b502e..5dba55cdfa63 100644
--- a/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
+++ b/Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
@@ -8,7 +8,11 @@ Required properties:
 - interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
 
 Optional properties:
-- clocks: Reference to the crypto engine clock.
+- clocks: Reference to the crypto engine clocks, the second clock is
+          needed for the Armada 7K/8K SoCs.
+- clock-names: mandatory if there is a second clock, in this case the
+               name must be "core" for the first clock and "reg" for
+               the second one.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mscc,ocelot-icpu-intr.txt b/Documentation/devicetree/bindings/interrupt-controller/mscc,ocelot-icpu-intr.txt
new file mode 100644
index 000000000000..b47a8a02b17b
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/mscc,ocelot-icpu-intr.txt
@@ -0,0 +1,22 @@
+Microsemi Ocelot SoC ICPU Interrupt Controller
+
+Required properties:
+
+- compatible : should be "mscc,ocelot-icpu-intr"
+- reg : Specifies base physical address and size of the registers.
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The value shall be 1.
+- interrupt-parent : phandle of the CPU interrupt controller.
+- interrupts : Specifies the CPU interrupt the controller is connected to.
+
+Example:
+
+		intc: interrupt-controller@70000070 {
+			compatible = "mscc,ocelot-icpu-intr";
+			reg = <0x70000070 0x70>;
+			#interrupt-cells = <1>;
+			interrupt-controller;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <2>;
+		};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt b/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt
new file mode 100644
index 000000000000..0b2c97ddb520
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/qcom,pdc.txt
@@ -0,0 +1,78 @@
+PDC interrupt controller
+
+Qualcomm Technologies Inc. SoCs based on the RPM Hardened architecture have a
+Power Domain Controller (PDC) that is on always-on domain. In addition to
+providing power control for the power domains, the hardware also has an
+interrupt controller that can be used to help detect edge low interrupts as
+well detect interrupts when the GIC is non-operational.
+
+GIC is parent interrupt controller at the highest level. Platform interrupt
+controller PDC is next in hierarchy, followed by others. Drivers requiring
+wakeup capabilities of their device interrupts routed through the PDC, must
+specify PDC as their interrupt controller and request the PDC port associated
+with the GIC interrupt. See example below.
+
+Properties:
+
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: Should contain "qcom,<soc>-pdc"
+		    - "qcom,sdm845-pdc": For SDM845
+
+- reg:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: Specifies the base physical address for PDC hardware.
+
+- interrupt-cells:
+	Usage: required
+	Value type: <u32>
+	Definition: Specifies the number of cells needed to encode an interrupt
+		    source.
+		    Must be 2.
+		    The first element of the tuple is the PDC pin for the
+		    interrupt.
+		    The second element is the trigger type.
+
+- interrupt-parent:
+	Usage: required
+	Value type: <phandle>
+	Definition: Specifies the interrupt parent necessary for hierarchical
+		    domain to operate.
+
+- interrupt-controller:
+	Usage: required
+	Value type: <bool>
+	Definition: Identifies the node as an interrupt controller.
+
+- qcom,pdc-ranges:
+	Usage: required
+	Value type: <u32 array>
+	Definition: Specifies the PDC pin offset and the number of PDC ports.
+		    The tuples indicates the valid mapping of valid PDC ports
+		    and their hwirq mapping.
+		    The first element of the tuple is the starting PDC port.
+		    The second element is the GIC hwirq number for the PDC port.
+		    The third element is the number of interrupts in sequence.
+
+Example:
+
+	pdc: interrupt-controller@b220000 {
+		compatible = "qcom,sdm845-pdc";
+		reg = <0xb220000 0x30000>;
+		qcom,pdc-ranges = <0 512 94>, <94 641 15>, <115 662 7>;
+		#interrupt-cells = <2>;
+		interrupt-parent = <&intc>;
+		interrupt-controller;
+	};
+
+DT binding of a device that wants to use the GIC SPI 514 as a wakeup
+interrupt, must do -
+
+	wake-device {
+		interrupts-extended = <&pdc 2 IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+In this case interrupt 514 would be mapped to port 2 on the PDC as defined by
+the qcom,pdc-ranges property.
diff --git a/Documentation/devicetree/bindings/rng/imx-rngc.txt b/Documentation/devicetree/bindings/rng/imx-rng.txt
index 93c7174a7bed..405c2b00ccb0 100644
--- a/Documentation/devicetree/bindings/rng/imx-rngc.txt
+++ b/Documentation/devicetree/bindings/rng/imx-rng.txt
@@ -1,15 +1,14 @@
-Freescale RNGC (Random Number Generator Version C)
-
-The driver also supports version B, which is mostly compatible
-to version C.
+Freescale RNGA/RNGB/RNGC (Random Number Generator Versions A, B and C)
 
 Required properties:
 - compatible : should be one of
+               "fsl,imx21-rnga"
+               "fsl,imx31-rnga" (backward compatible with "fsl,imx21-rnga")
                "fsl,imx25-rngb"
                "fsl,imx35-rngc"
 - reg : offset and length of the register set of this block
-- interrupts : the interrupt number for the RNGC block
-- clocks : the RNGC clk source
+- interrupts : the interrupt number for the RNG block
+- clocks : the RNG clk source
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/rng/ks-sa-rng.txt b/Documentation/devicetree/bindings/rng/ks-sa-rng.txt
new file mode 100644
index 000000000000..b7a65b487901
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/ks-sa-rng.txt
@@ -0,0 +1,21 @@
+Keystone SoC Hardware Random Number Generator(HWRNG) Module
+
+On Keystone SoCs HWRNG module is a submodule of the Security Accelerator.
+
+- compatible: should be "ti,keystone-rng"
+- ti,syscon-sa-cfg: phandle to syscon node of the SA configuration registers.
+		    This registers are shared between hwrng and crypto drivers.
+- clocks: phandle to the reference clocks for the subsystem
+- clock-names: functional clock name. Should be set to "fck"
+- reg: HWRNG module register space
+
+Example:
+/* K2HK */
+
+rng@24000 {
+	compatible = "ti,keystone-rng";
+	ti,syscon-sa-cfg = <&sa_config>;
+	clocks = <&clksa>;
+	clock-names = "fck";
+	reg = <0x24000 0x1000>;
+};
diff --git a/Documentation/devicetree/bindings/rng/omap_rng.txt b/Documentation/devicetree/bindings/rng/omap_rng.txt
index 9cf7876ab434..ea434ce50f36 100644
--- a/Documentation/devicetree/bindings/rng/omap_rng.txt
+++ b/Documentation/devicetree/bindings/rng/omap_rng.txt
@@ -13,7 +13,12 @@ Required properties:
 - interrupts : the interrupt number for the RNG module.
 		Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
 - clocks: the trng clock source. Only mandatory for the
-  "inside-secure,safexcel-eip76" compatible.
+  "inside-secure,safexcel-eip76" compatible, the second clock is
+  needed for the Armada 7K/8K SoCs
+- clock-names: mandatory if there is a second clock, in this case the
+  name must be "core" for the first clock and "reg" for the second
+  one
+
 
 Example:
 /* AM335x */
diff --git a/Documentation/devicetree/bindings/rng/st,stm32-rng.txt b/Documentation/devicetree/bindings/rng/st,stm32-rng.txt
index 47f04176f93b..1dfa7d51e006 100644
--- a/Documentation/devicetree/bindings/rng/st,stm32-rng.txt
+++ b/Documentation/devicetree/bindings/rng/st,stm32-rng.txt
@@ -11,6 +11,10 @@ Required properties:
 - interrupts : The designated IRQ line for the RNG
 - clocks : The clock needed to enable the RNG
 
+Optional properties:
+- resets : The reset to properly start RNG
+- clock-error-detect : Enable the clock detection management
+
 Example:
 
 	rng: rng@50060800 {
diff --git a/Documentation/filesystems/cifs/README b/Documentation/filesystems/cifs/README
index a9da51553ba3..99ce3d25003d 100644
--- a/Documentation/filesystems/cifs/README
+++ b/Documentation/filesystems/cifs/README
@@ -11,13 +11,14 @@ Information Foundation.  CIFS and now SMB3 has now become a defacto
 standard for interoperating between Macs and Windows and major NAS appliances.
 
 Please see
+  MS-SMB2 (for detailed SMB2/SMB3/SMB3.1.1 protocol specification)
   http://protocolfreedom.org/ and
   http://samba.org/samba/PFIF/
 for more details.
 
 
 For questions or bug reports please contact:
-    sfrench@samba.org (sfrench@us.ibm.com) 
+    smfrench@gmail.com
 
 See the project page at: https://wiki.samba.org/index.php/LinuxCIFS_utils
 
@@ -37,15 +38,15 @@ Installation instructions:
 =========================
 If you have built the CIFS vfs as module (successfully) simply
 type "make modules_install" (or if you prefer, manually copy the file to
-the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.o).
+the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.ko).
 
 If you have built the CIFS vfs into the kernel itself, follow the instructions
 for your distribution on how to install a new kernel (usually you
 would simply type "make install").
 
-If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on 
-the CIFS VFS web site) copy it to the same directory in which mount.smbfs and 
-similar files reside (usually /sbin).  Although the helper software is not  
+If you do not have the utility mount.cifs (in the Samba 4.x source tree and on
+the CIFS VFS web site) copy it to the same directory in which mount helpers
+reside (usually /sbin).  Although the helper software is not
 required, mount.cifs is recommended.  Most distros include a "cifs-utils"
 package that includes this utility so it is recommended to install this.
 
@@ -118,10 +119,13 @@ this can become unwieldy when potential mount targets include many
 or  unpredictable UNC names.
 
 Samba Considerations 
-==================== 
-To get the maximum benefit from the CIFS VFS, we recommend using a server that 
-supports the SNIA CIFS Unix Extensions standard (e.g.  Samba 2.2.5 or later or 
-Samba 3.0) but the CIFS vfs works fine with a wide variety of CIFS servers.  
+====================
+Most current servers support SMB2.1 and SMB3 which are more secure,
+but there are useful protocol extensions for the older less secure CIFS
+dialect, so to get the maximum benefit if mounting using the older dialect
+(CIFS/SMB1), we recommend using a server that supports the SNIA CIFS
+Unix Extensions standard (e.g. almost any  version of Samba ie version
+2.2.5 or later) but the CIFS vfs works fine with a wide variety of CIFS servers.
 Note that uid, gid and file permissions will display default values if you do 
 not have a server that supports the Unix extensions for CIFS (such as Samba 
 2.2.5 or later).  To enable the Unix CIFS Extensions in the Samba server, add 
@@ -603,11 +607,6 @@ Stats			Lists summary resource usage information as well as per
 			in the kernel configuration.
 
 Configuration pseudo-files:
-PacketSigningEnabled	If set to one, cifs packet signing is enabled
-			and will be used if the server requires 
-			it.  If set to two, cifs packet signing is
-			required even if the server considers packet
-			signing optional. (default 1)
 SecurityFlags		Flags which control security negotiation and
 			also packet signing. Authentication (may/must)
 			flags (e.g. for NTLM and/or NTLMv2) may be combined with
@@ -666,8 +665,6 @@ traceSMB		If set to one, debug information is logged to the
 LookupCacheEnable	If set to one, inode information is kept cached
 			for one second improving performance of lookups
 			(default 1)
-OplockEnabled		If set to one, safe distributed caching enabled.
-			(default 1)
 LinuxExtensionsEnabled	If set to one then the client will attempt to
 			use the CIFS "UNIX" extensions which are optional
 			protocol enhancements that allow CIFS servers
diff --git a/Documentation/filesystems/cifs/TODO b/Documentation/filesystems/cifs/TODO
index 396ecfd6ff4a..c5adf149b57f 100644
--- a/Documentation/filesystems/cifs/TODO
+++ b/Documentation/filesystems/cifs/TODO
@@ -1,4 +1,4 @@
-Version 2.04 September 13, 2017
+Version 2.11 September 13, 2017
 
 A Partial List of Missing Features
 ==================================
@@ -8,10 +8,10 @@ for visible, important contributions to this module.  Here
 is a partial list of the known problems and missing features:
 
 a) SMB3 (and SMB3.02) missing optional features:
-   - RDMA (started)
-   - multichannel (started)
+   - multichannel (started), integration with RDMA
    - directory leases (improved metadata caching)
-   - T10 copy offload (copy chunk is only mechanism supported)
+   - T10 copy offload (copy chunk, and "Duplicate Extents" ioctl
+     currently the only two server side copy mechanisms supported)
 
 b) improved sparse file support
 
@@ -21,9 +21,8 @@ using Directory Leases
 d) quota support (needs minor kernel change since quota calls
 to make it to network filesystems or deviceless filesystems)
 
-e) Better optimize open to reduce redundant opens (using reference
-counts more) and to improve use of compounding in SMB3 to reduce
-number of roundtrips.
+e) Compounding (in progress) to reduce number of roundtrips, and also
+better optimize open to reduce redundant opens (using reference counts more).
 
 f) Finish inotify support so kde and gnome file list windows
 will autorefresh (partially complete by Asser). Needs minor kernel
@@ -35,7 +34,8 @@ the CIFS statistics (started)
 h) implement support for security and trusted categories of xattrs
 (requires minor protocol extension) to enable better support for SELINUX
 
-i) Implement O_DIRECT flag on open (already supported on mount)
+i) Add support for tree connect contexts (see MS-SMB2) a new SMB3.1.1 protocol
+   feature (may be especially useful for virtualization).
 
 j) Create UID mapping facility so server UIDs can be mapped on a per
 mount or a per server basis to client UIDs or nobody if no mapping
@@ -53,13 +53,16 @@ viewing them.
 
 o) mount helper GUI (to simplify the various configuration options on mount)
 
-p) autonegotiation of dialects (offering more than one dialect ie SMB3.02,
-SMB3, SMB2.1 not just SMB3).
+p) Add support for witness protocol (perhaps ioctl to cifs.ko from user space
+   tool listening on witness protocol RPC) to allow for notification of share
+   move, server failover, and server adapter changes.  And also improve other
+   failover scenarios, e.g. when client knows multiple DFS entries point to
+   different servers, and the server we are connected to has gone down.
 
 q) Allow mount.cifs to be more verbose in reporting errors with dialect
 or unsupported feature errors.
 
-r) updating cifs documentation, and user guid.
+r) updating cifs documentation, and user guide.
 
 s) Addressing bugs found by running a broader set of xfstests in standard
 file system xfstest suite.
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 0bc33ad4a3f9..fdf5fb54a04c 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -461,21 +461,13 @@ of ftrace. Here is a list of some of the key files:
 		and ticks at the same rate as the hardware clocksource.
 
 	boot:
-		This is the boot clock (CLOCK_BOOTTIME) and is based on the
-		fast monotonic clock, but also accounts for time spent in
-		suspend. Since the clock access is designed for use in
-		tracing in the suspend path, some side effects are possible
-		if clock is accessed after the suspend time is accounted before
-		the fast mono clock is updated. In this case, the clock update
-		appears to happen slightly sooner than it normally would have.
-		Also on 32-bit systems, it's possible that the 64-bit boot offset
-		sees a partial update. These effects are rare and post
-		processing should be able to handle them. See comments in the
-		ktime_get_boot_fast_ns() function for more information.
-
-		To set a clock, simply echo the clock name into this file::
-
-		  echo global > trace_clock
+		Same as mono. Used to be a separate clock which accounted
+		for the time spent in suspend while CLOCK_MONOTONIC did
+		not.
+
+	To set a clock, simply echo the clock name into this file::
+
+	  # echo global > trace_clock
 
   trace_marker:
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 2328eed6aea9..9d42bb8bb120 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3252,12 +3252,11 @@ F:	drivers/net/ieee802154/cc2520.c
 F:	include/linux/spi/cc2520.h
 F:	Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
 
-CCREE ARM TRUSTZONE CRYPTOCELL 700 REE DRIVER
+CCREE ARM TRUSTZONE CRYPTOCELL REE DRIVER
 M:	Gilad Ben-Yossef <gilad@benyossef.com>
 L:	linux-crypto@vger.kernel.org
-L:	driverdev-devel@linuxdriverproject.org
 S:	Supported
-F:	drivers/staging/ccree/
+F:	drivers/crypto/ccree/
 W:	https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
 
 CEC FRAMEWORK
@@ -6962,7 +6961,7 @@ F:	drivers/input/input-mt.c
 K:	\b(ABS|SYN)_MT_
 
 INSIDE SECURE CRYPTO DRIVER
-M:	Antoine Tenart <antoine.tenart@free-electrons.com>
+M:	Antoine Tenart <antoine.tenart@bootlin.com>
 F:	drivers/crypto/inside-secure/
 S:	Maintained
 L:	linux-crypto@vger.kernel.org
@@ -7200,6 +7199,14 @@ L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/i40iw/
 
+INTEL SHA MULTIBUFFER DRIVER
+M:	Megha Dey <megha.dey@linux.intel.com>
+R:	Tim Chen <tim.c.chen@linux.intel.com>
+L:	linux-crypto@vger.kernel.org
+S:	Supported
+F:	arch/x86/crypto/sha*-mb
+F:	crypto/mcryptd.c
+
 INTEL TELEMETRY DRIVER
 M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 L:	platform-driver-x86@vger.kernel.org
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b8e69fe282b8..925d1364727a 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -121,4 +121,10 @@ config CRYPTO_CHACHA20_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_CHACHA20
 
+config CRYPTO_SPECK_NEON
+	tristate "NEON accelerated Speck cipher algorithms"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_SPECK
+
 endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 30ef8e291271..3304e671918d 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
+obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -53,7 +54,9 @@ ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+speck-neon-y := speck-neon-core.o speck-neon-glue.o
 
+ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
 
@@ -62,5 +65,6 @@ $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
 
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
 	$(call cmd,perl)
+endif
 
 .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index 54b384084637..184d6c2d15d5 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -174,6 +174,16 @@
 	.ltorg
 	.endm
 
+ENTRY(__aes_arm_encrypt)
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+ENDPROC(__aes_arm_encrypt)
+
+	.align		5
+ENTRY(__aes_arm_decrypt)
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
+ENDPROC(__aes_arm_decrypt)
+
+	.section	".rodata", "a"
 	.align		L1_CACHE_SHIFT
 	.type		__aes_arm_inverse_sbox, %object
 __aes_arm_inverse_sbox:
@@ -210,12 +220,3 @@ __aes_arm_inverse_sbox:
 	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
-
-ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-ENDPROC(__aes_arm_encrypt)
-
-	.align		5
-ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
-ENDPROC(__aes_arm_decrypt)
diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S
new file mode 100644
index 000000000000..3c1e203e53b9
--- /dev/null
+++ b/arch/arm/crypto/speck-neon-core.S
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+	.text
+	.fpu		neon
+
+	// arguments
+	ROUND_KEYS	.req	r0	// const {u64,u32} *round_keys
+	NROUNDS		.req	r1	// int nrounds
+	DST		.req	r2	// void *dst
+	SRC		.req	r3	// const void *src
+	NBYTES		.req	r4	// unsigned int nbytes
+	TWEAK		.req	r5	// void *tweak
+
+	// registers which hold the data being encrypted/decrypted
+	X0		.req	q0
+	X0_L		.req	d0
+	X0_H		.req	d1
+	Y0		.req	q1
+	Y0_H		.req	d3
+	X1		.req	q2
+	X1_L		.req	d4
+	X1_H		.req	d5
+	Y1		.req	q3
+	Y1_H		.req	d7
+	X2		.req	q4
+	X2_L		.req	d8
+	X2_H		.req	d9
+	Y2		.req	q5
+	Y2_H		.req	d11
+	X3		.req	q6
+	X3_L		.req	d12
+	X3_H		.req	d13
+	Y3		.req	q7
+	Y3_H		.req	d15
+
+	// the round key, duplicated in all lanes
+	ROUND_KEY	.req	q8
+	ROUND_KEY_L	.req	d16
+	ROUND_KEY_H	.req	d17
+
+	// index vector for vtbl-based 8-bit rotates
+	ROTATE_TABLE	.req	d18
+
+	// multiplication table for updating XTS tweaks
+	GF128MUL_TABLE	.req	d19
+	GF64MUL_TABLE	.req	d19
+
+	// current XTS tweak value(s)
+	TWEAKV		.req	q10
+	TWEAKV_L	.req	d20
+	TWEAKV_H	.req	d21
+
+	TMP0		.req	q12
+	TMP0_L		.req	d24
+	TMP0_H		.req	d25
+	TMP1		.req	q13
+	TMP2		.req	q14
+	TMP3		.req	q15
+
+	.align		4
+.Lror64_8_table:
+	.byte		1, 2, 3, 4, 5, 6, 7, 0
+.Lror32_8_table:
+	.byte		1, 2, 3, 0, 5, 6, 7, 4
+.Lrol64_8_table:
+	.byte		7, 0, 1, 2, 3, 4, 5, 6
+.Lrol32_8_table:
+	.byte		3, 0, 1, 2, 7, 4, 5, 6
+.Lgf128mul_table:
+	.byte		0, 0x87
+	.fill		14
+.Lgf64mul_table:
+	.byte		0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
+	.fill		12
+
+/*
+ * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
+ *
+ * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
+ * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
+ * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
+ *
+ * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
+ * the vtbl approach is faster on some processors and the same speed on others.
+ */
+.macro _speck_round_128bytes	n
+
+	// x = ror(x, 8)
+	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
+	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
+	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
+	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
+	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
+	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
+	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
+	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
+
+	// x += y
+	vadd.u\n	X0, Y0
+	vadd.u\n	X1, Y1
+	vadd.u\n	X2, Y2
+	vadd.u\n	X3, Y3
+
+	// x ^= k
+	veor		X0, ROUND_KEY
+	veor		X1, ROUND_KEY
+	veor		X2, ROUND_KEY
+	veor		X3, ROUND_KEY
+
+	// y = rol(y, 3)
+	vshl.u\n	TMP0, Y0, #3
+	vshl.u\n	TMP1, Y1, #3
+	vshl.u\n	TMP2, Y2, #3
+	vshl.u\n	TMP3, Y3, #3
+	vsri.u\n	TMP0, Y0, #(\n - 3)
+	vsri.u\n	TMP1, Y1, #(\n - 3)
+	vsri.u\n	TMP2, Y2, #(\n - 3)
+	vsri.u\n	TMP3, Y3, #(\n - 3)
+
+	// y ^= x
+	veor		Y0, TMP0, X0
+	veor		Y1, TMP1, X1
+	veor		Y2, TMP2, X2
+	veor		Y3, TMP3, X3
+.endm
+
+/*
+ * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
+ *
+ * This is the inverse of _speck_round_128bytes().
+ */
+.macro _speck_unround_128bytes	n
+
+	// y ^= x
+	veor		TMP0, Y0, X0
+	veor		TMP1, Y1, X1
+	veor		TMP2, Y2, X2
+	veor		TMP3, Y3, X3
+
+	// y = ror(y, 3)
+	vshr.u\n	Y0, TMP0, #3
+	vshr.u\n	Y1, TMP1, #3
+	vshr.u\n	Y2, TMP2, #3
+	vshr.u\n	Y3, TMP3, #3
+	vsli.u\n	Y0, TMP0, #(\n - 3)
+	vsli.u\n	Y1, TMP1, #(\n - 3)
+	vsli.u\n	Y2, TMP2, #(\n - 3)
+	vsli.u\n	Y3, TMP3, #(\n - 3)
+
+	// x ^= k
+	veor		X0, ROUND_KEY
+	veor		X1, ROUND_KEY
+	veor		X2, ROUND_KEY
+	veor		X3, ROUND_KEY
+
+	// x -= y
+	vsub.u\n	X0, Y0
+	vsub.u\n	X1, Y1
+	vsub.u\n	X2, Y2
+	vsub.u\n	X3, Y3
+
+	// x = rol(x, 8);
+	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
+	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
+	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
+	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
+	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
+	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
+	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
+	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
+.endm
+
+.macro _xts128_precrypt_one	dst_reg, tweak_buf, tmp
+
+	// Load the next source block
+	vld1.8		{\dst_reg}, [SRC]!
+
+	// Save the current tweak in the tweak buffer
+	vst1.8		{TWEAKV}, [\tweak_buf:128]!
+
+	// XOR the next source block with the current tweak
+	veor		\dst_reg, TWEAKV
+
+	/*
+	 * Calculate the next tweak by multiplying the current one by x,
+	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
+	 */
+	vshr.u64	\tmp, TWEAKV, #63
+	vshl.u64	TWEAKV, #1
+	veor		TWEAKV_H, \tmp\()_L
+	vtbl.8		\tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
+	veor		TWEAKV_L, \tmp\()_H
+.endm
+
+.macro _xts64_precrypt_two	dst_reg, tweak_buf, tmp
+
+	// Load the next two source blocks
+	vld1.8		{\dst_reg}, [SRC]!
+
+	// Save the current two tweaks in the tweak buffer
+	vst1.8		{TWEAKV}, [\tweak_buf:128]!
+
+	// XOR the next two source blocks with the current two tweaks
+	veor		\dst_reg, TWEAKV
+
+	/*
+	 * Calculate the next two tweaks by multiplying the current ones by x^2,
+	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
+	 */
+	vshr.u64	\tmp, TWEAKV, #62
+	vshl.u64	TWEAKV, #2
+	vtbl.8		\tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
+	vtbl.8		\tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
+	veor		TWEAKV, \tmp
+.endm
+
+/*
+ * _speck_xts_crypt() - Speck-XTS encryption/decryption
+ *
+ * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
+ * using Speck-XTS, specifically the variant with a block size of '2n' and round
+ * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
+ * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
+ * nonzero multiple of 128.
+ */
+.macro _speck_xts_crypt	n, decrypting
+	push		{r4-r7}
+	mov		r7, sp
+
+	/*
+	 * The first four parameters were passed in registers r0-r3.  Load the
+	 * additional parameters, which were passed on the stack.
+	 */
+	ldr		NBYTES, [sp, #16]
+	ldr		TWEAK, [sp, #20]
+
+	/*
+	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
+	 * round key rather than the first, since for decryption the round keys
+	 * are used in reverse order.
+	 */
+.if \decrypting
+.if \n == 64
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
+	sub		ROUND_KEYS, #8
+.else
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
+	sub		ROUND_KEYS, #4
+.endif
+.endif
+
+	// Load the index vector for vtbl-based 8-bit rotates
+.if \decrypting
+	ldr		r12, =.Lrol\n\()_8_table
+.else
+	ldr		r12, =.Lror\n\()_8_table
+.endif
+	vld1.8		{ROTATE_TABLE}, [r12:64]
+
+	// One-time XTS preparation
+
+	/*
+	 * Allocate stack space to store 128 bytes worth of tweaks.  For
+	 * performance, this space is aligned to a 16-byte boundary so that we
+	 * can use the load/store instructions that declare 16-byte alignment.
+	 */
+	sub		sp, #128
+	bic		sp, #0xf
+
+.if \n == 64
+	// Load first tweak
+	vld1.8		{TWEAKV}, [TWEAK]
+
+	// Load GF(2^128) multiplication table
+	ldr		r12, =.Lgf128mul_table
+	vld1.8		{GF128MUL_TABLE}, [r12:64]
+.else
+	// Load first tweak
+	vld1.8		{TWEAKV_L}, [TWEAK]
+
+	// Load GF(2^64) multiplication table
+	ldr		r12, =.Lgf64mul_table
+	vld1.8		{GF64MUL_TABLE}, [r12:64]
+
+	// Calculate second tweak, packing it together with the first
+	vshr.u64	TMP0_L, TWEAKV_L, #63
+	vtbl.u8		TMP0_L, {GF64MUL_TABLE}, TMP0_L
+	vshl.u64	TWEAKV_H, TWEAKV_L, #1
+	veor		TWEAKV_H, TMP0_L
+.endif
+
+.Lnext_128bytes_\@:
+
+	/*
+	 * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
+	 * values, and save the tweaks on the stack for later.  Then
+	 * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
+	 * that the X[0-3] registers contain only the second halves of blocks,
+	 * and the Y[0-3] registers contain only the first halves of blocks.
+	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
+	 */
+	mov		r12, sp
+.if \n == 64
+	_xts128_precrypt_one	X0, r12, TMP0
+	_xts128_precrypt_one	Y0, r12, TMP0
+	_xts128_precrypt_one	X1, r12, TMP0
+	_xts128_precrypt_one	Y1, r12, TMP0
+	_xts128_precrypt_one	X2, r12, TMP0
+	_xts128_precrypt_one	Y2, r12, TMP0
+	_xts128_precrypt_one	X3, r12, TMP0
+	_xts128_precrypt_one	Y3, r12, TMP0
+	vswp		X0_L, Y0_H
+	vswp		X1_L, Y1_H
+	vswp		X2_L, Y2_H
+	vswp		X3_L, Y3_H
+.else
+	_xts64_precrypt_two	X0, r12, TMP0
+	_xts64_precrypt_two	Y0, r12, TMP0
+	_xts64_precrypt_two	X1, r12, TMP0
+	_xts64_precrypt_two	Y1, r12, TMP0
+	_xts64_precrypt_two	X2, r12, TMP0
+	_xts64_precrypt_two	Y2, r12, TMP0
+	_xts64_precrypt_two	X3, r12, TMP0
+	_xts64_precrypt_two	Y3, r12, TMP0
+	vuzp.32		Y0, X0
+	vuzp.32		Y1, X1
+	vuzp.32		Y2, X2
+	vuzp.32		Y3, X3
+.endif
+
+	// Do the cipher rounds
+
+	mov		r12, ROUND_KEYS
+	mov		r6, NROUNDS
+
+.Lnext_round_\@:
+.if \decrypting
+.if \n == 64
+	vld1.64		ROUND_KEY_L, [r12]
+	sub		r12, #8
+	vmov		ROUND_KEY_H, ROUND_KEY_L
+.else
+	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
+	sub		r12, #4
+.endif
+	_speck_unround_128bytes	\n
+.else
+.if \n == 64
+	vld1.64		ROUND_KEY_L, [r12]!
+	vmov		ROUND_KEY_H, ROUND_KEY_L
+.else
+	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
+.endif
+	_speck_round_128bytes	\n
+.endif
+	subs		r6, r6, #1
+	bne		.Lnext_round_\@
+
+	// Re-interleave the 'x' and 'y' elements of each block
+.if \n == 64
+	vswp		X0_L, Y0_H
+	vswp		X1_L, Y1_H
+	vswp		X2_L, Y2_H
+	vswp		X3_L, Y3_H
+.else
+	vzip.32		Y0, X0
+	vzip.32		Y1, X1
+	vzip.32		Y2, X2
+	vzip.32		Y3, X3
+.endif
+
+	// XOR the encrypted/decrypted blocks with the tweaks we saved earlier
+	mov		r12, sp
+	vld1.8		{TMP0, TMP1}, [r12:128]!
+	vld1.8		{TMP2, TMP3}, [r12:128]!
+	veor		X0, TMP0
+	veor		Y0, TMP1
+	veor		X1, TMP2
+	veor		Y1, TMP3
+	vld1.8		{TMP0, TMP1}, [r12:128]!
+	vld1.8		{TMP2, TMP3}, [r12:128]!
+	veor		X2, TMP0
+	veor		Y2, TMP1
+	veor		X3, TMP2
+	veor		Y3, TMP3
+
+	// Store the ciphertext in the destination buffer
+	vst1.8		{X0, Y0}, [DST]!
+	vst1.8		{X1, Y1}, [DST]!
+	vst1.8		{X2, Y2}, [DST]!
+	vst1.8		{X3, Y3}, [DST]!
+
+	// Continue if there are more 128-byte chunks remaining, else return
+	subs		NBYTES, #128
+	bne		.Lnext_128bytes_\@
+
+	// Store the next tweak
+.if \n == 64
+	vst1.8		{TWEAKV}, [TWEAK]
+.else
+	vst1.8		{TWEAKV_L}, [TWEAK]
+.endif
+
+	mov		sp, r7
+	pop		{r4-r7}
+	bx		lr
+.endm
+
+ENTRY(speck128_xts_encrypt_neon)
+	_speck_xts_crypt	n=64, decrypting=0
+ENDPROC(speck128_xts_encrypt_neon)
+
+ENTRY(speck128_xts_decrypt_neon)
+	_speck_xts_crypt	n=64, decrypting=1
+ENDPROC(speck128_xts_decrypt_neon)
+
+ENTRY(speck64_xts_encrypt_neon)
+	_speck_xts_crypt	n=32, decrypting=0
+ENDPROC(speck64_xts_encrypt_neon)
+
+ENTRY(speck64_xts_decrypt_neon)
+	_speck_xts_crypt	n=32, decrypting=1
+ENDPROC(speck64_xts_decrypt_neon)
diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c
new file mode 100644
index 000000000000..f012c3ea998f
--- /dev/null
+++ b/arch/arm/crypto/speck-neon-glue.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
+ * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
+ * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
+ * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
+ * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
+ * OCB and PMAC"), represented as 0x1B.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/algapi.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/speck.h>
+#include <crypto/xts.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/* The assembly functions only handle multiples of 128 bytes */
+#define SPECK_NEON_CHUNK_SIZE	128
+
+/* Speck128 */
+
+struct speck128_xts_tfm_ctx {
+	struct speck128_tfm_ctx main_key;
+	struct speck128_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
+				     u8 *, const u8 *);
+typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
+					  const void *, unsigned int, void *);
+
+static __always_inline int
+__speck128_xts_crypt(struct skcipher_request *req,
+		     speck128_crypt_one_t crypt_one,
+		     speck128_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	le128 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
+			gf128mul_x_ble(&tweak, &tweak);
+
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck128_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
+				    speck128_xts_encrypt_neon);
+}
+
+static int speck128_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
+				    speck128_xts_decrypt_neon);
+}
+
+static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+/* Speck64 */
+
+struct speck64_xts_tfm_ctx {
+	struct speck64_tfm_ctx main_key;
+	struct speck64_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
+				    u8 *, const u8 *);
+typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
+					 const void *, unsigned int, void *);
+
+static __always_inline int
+__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
+		    speck64_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	__le64 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			*(__le64 *)dst = *(__le64 *)src ^ tweak;
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			*(__le64 *)dst ^= tweak;
+			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
+					    ((tweak & cpu_to_le64(1ULL << 63)) ?
+					     0x1B : 0));
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck64_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
+				   speck64_xts_encrypt_neon);
+}
+
+static int speck64_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
+				   speck64_xts_decrypt_neon);
+}
+
+static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+static struct skcipher_alg speck_algs[] = {
+	{
+		.base.cra_name		= "xts(speck128)",
+		.base.cra_driver_name	= "xts-speck128-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
+		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
+		.ivsize			= SPECK128_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck128_xts_setkey,
+		.encrypt		= speck128_xts_encrypt,
+		.decrypt		= speck128_xts_decrypt,
+	}, {
+		.base.cra_name		= "xts(speck64)",
+		.base.cra_driver_name	= "xts-speck64-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
+		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
+		.ivsize			= SPECK64_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck64_xts_setkey,
+		.encrypt		= speck64_xts_encrypt,
+		.decrypt		= speck64_xts_decrypt,
+	}
+};
+
+static int __init speck_neon_module_init(void)
+{
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_neon_module_exit(void)
+{
+	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_neon_module_init);
+module_exit(speck_neon_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("xts(speck128)");
+MODULE_ALIAS_CRYPTO("xts-speck128-neon");
+MODULE_ALIAS_CRYPTO("xts(speck64)");
+MODULE_ALIAS_CRYPTO("xts-speck64-neon");
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 1070044f5c3f..0bd530702118 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -35,6 +35,18 @@
 #define ICC_IGRPEN1			__ACCESS_CP15(c12, 0, c12, 7)
 #define ICC_BPR1			__ACCESS_CP15(c12, 0, c12, 3)
 
+#define __ICC_AP0Rx(x)			__ACCESS_CP15(c12, 0, c8, 4 | x)
+#define ICC_AP0R0			__ICC_AP0Rx(0)
+#define ICC_AP0R1			__ICC_AP0Rx(1)
+#define ICC_AP0R2			__ICC_AP0Rx(2)
+#define ICC_AP0R3			__ICC_AP0Rx(3)
+
+#define __ICC_AP1Rx(x)			__ACCESS_CP15(c12, 0, c9, x)
+#define ICC_AP1R0			__ICC_AP1Rx(0)
+#define ICC_AP1R1			__ICC_AP1Rx(1)
+#define ICC_AP1R2			__ICC_AP1Rx(2)
+#define ICC_AP1R3			__ICC_AP1Rx(3)
+
 #define ICC_HSRE			__ACCESS_CP15(c12, 4, c9, 5)
 
 #define ICH_VSEIR			__ACCESS_CP15(c12, 4, c9, 4)
@@ -86,17 +98,17 @@
 #define ICH_LRC14			__LRC8(6)
 #define ICH_LRC15			__LRC8(7)
 
-#define __AP0Rx(x)			__ACCESS_CP15(c12, 4, c8, x)
-#define ICH_AP0R0			__AP0Rx(0)
-#define ICH_AP0R1			__AP0Rx(1)
-#define ICH_AP0R2			__AP0Rx(2)
-#define ICH_AP0R3			__AP0Rx(3)
+#define __ICH_AP0Rx(x)			__ACCESS_CP15(c12, 4, c8, x)
+#define ICH_AP0R0			__ICH_AP0Rx(0)
+#define ICH_AP0R1			__ICH_AP0Rx(1)
+#define ICH_AP0R2			__ICH_AP0Rx(2)
+#define ICH_AP0R3			__ICH_AP0Rx(3)
 
-#define __AP1Rx(x)			__ACCESS_CP15(c12, 4, c9, x)
-#define ICH_AP1R0			__AP1Rx(0)
-#define ICH_AP1R1			__AP1Rx(1)
-#define ICH_AP1R2			__AP1Rx(2)
-#define ICH_AP1R3			__AP1Rx(3)
+#define __ICH_AP1Rx(x)			__ACCESS_CP15(c12, 4, c9, x)
+#define ICH_AP1R0			__ICH_AP1Rx(0)
+#define ICH_AP1R1			__ICH_AP1Rx(1)
+#define ICH_AP1R2			__ICH_AP1Rx(2)
+#define ICH_AP1R3			__ICH_AP1Rx(3)
 
 /* A32-to-A64 mappings used by VGIC save/restore */
 
@@ -125,6 +137,16 @@ static inline u64 read_ ## a64(void)		\
 	return val; 				\
 }
 
+CPUIF_MAP(ICC_PMR, ICC_PMR_EL1)
+CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1)
+CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1)
+CPUIF_MAP(ICC_AP0R2, ICC_AP0R2_EL1)
+CPUIF_MAP(ICC_AP0R3, ICC_AP0R3_EL1)
+CPUIF_MAP(ICC_AP1R0, ICC_AP1R0_EL1)
+CPUIF_MAP(ICC_AP1R1, ICC_AP1R1_EL1)
+CPUIF_MAP(ICC_AP1R2, ICC_AP1R2_EL1)
+CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1)
+
 CPUIF_MAP(ICH_HCR, ICH_HCR_EL2)
 CPUIF_MAP(ICH_VTR, ICH_VTR_EL2)
 CPUIF_MAP(ICH_MISR, ICH_MISR_EL2)
@@ -185,11 +207,6 @@ static inline u32 gic_read_iar(void)
 	return irqstat;
 }
 
-static inline void gic_write_pmr(u32 val)
-{
-	write_sysreg(val, ICC_PMR);
-}
-
 static inline void gic_write_ctlr(u32 val)
 {
 	write_sysreg(val, ICC_CTLR);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7381eeb7ef8e..177be0d1d090 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -132,6 +132,7 @@ config ARM64
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA
+	select MULTI_IRQ_HANDLER
 	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
@@ -275,6 +276,9 @@ config ARCH_SUPPORTS_UPROBES
 config ARCH_PROC_KCORE_TEXT
 	def_bool y
 
+config MULTI_IRQ_HANDLER
+	def_bool y
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -455,12 +459,26 @@ config ARM64_ERRATUM_845719
 config ARM64_ERRATUM_843419
 	bool "Cortex-A53: 843419: A load or store might access an incorrect address"
 	default y
-	select ARM64_MODULE_CMODEL_LARGE if MODULES
+	select ARM64_MODULE_PLTS if MODULES
 	help
 	  This option links the kernel with '--fix-cortex-a53-843419' and
-	  builds modules using the large memory model in order to avoid the use
-	  of the ADRP instruction, which can cause a subsequent memory access
-	  to use an incorrect address on Cortex-A53 parts up to r0p4.
+	  enables PLT support to replace certain ADRP instructions, which can
+	  cause subsequent memory accesses to use an incorrect address on
+	  Cortex-A53 parts up to r0p4.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_1024718
+	bool "Cortex-A55: 1024718: Update of DBM/AP bits without break before make might result in incorrect update"
+	default y
+	help
+	  This option adds work around for Arm Cortex-A55 Erratum 1024718.
+
+	  Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect
+	  update of the hardware dirty bit when the DBM/AP bits are updated
+	  without a break-before-make. The work around is to disable the usage
+	  of hardware DBM locally on the affected cores. CPUs not affected by
+	  erratum will continue to use the feature.
 
 	  If unsure, say Y.
 
@@ -1104,12 +1122,25 @@ config ARM64_SVE
 
 	  To enable use of this extension on CPUs that implement it, say Y.
 
-config ARM64_MODULE_CMODEL_LARGE
-	bool
+	  Note that for architectural reasons, firmware _must_ implement SVE
+	  support when running on SVE capable hardware.  The required support
+	  is present in:
+
+	    * version 1.5 and later of the ARM Trusted Firmware
+	    * the AArch64 boot wrapper since commit 5e1261e08abf
+	      ("bootwrapper: SVE: Enable SVE for EL2 and below").
+
+	  For other firmware implementations, consult the firmware documentation
+	  or vendor.
+
+	  If you need the kernel to boot on SVE-capable hardware with broken
+	  firmware, you may need to say N here until you get your firmware
+	  fixed.  Otherwise, you may experience firmware panics or lockups when
+	  booting the kernel.  If unsure and you are not observing these
+	  symptoms, you should assume that it is safe to say Y.
 
 config ARM64_MODULE_PLTS
 	bool
-	select ARM64_MODULE_CMODEL_LARGE
 	select HAVE_MOD_ARCH_SPECIFIC
 
 config RELOCATABLE
@@ -1143,12 +1174,12 @@ config RANDOMIZE_BASE
 	  If unsure, say N.
 
 config RANDOMIZE_MODULE_REGION_FULL
-	bool "Randomize the module region independently from the core kernel"
+	bool "Randomize the module region over a 4 GB range"
 	depends on RANDOMIZE_BASE
 	default y
 	help
-	  Randomizes the location of the module region without considering the
-	  location of the core kernel. This way, it is impossible for modules
+	  Randomizes the location of the module region inside a 4 GB window
+	  covering the core kernel. This way, it is less likely for modules
 	  to leak information about the location of core kernel data structures
 	  but it does imply that function calls between modules and the core
 	  kernel will need to be resolved via veneers in the module PLT.
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b481b4a7c011..15402861bb59 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -51,7 +51,6 @@ endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst)
 
 KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
@@ -77,10 +76,6 @@ endif
 
 CHECKFLAGS	+= -D__aarch64__ -m64
 
-ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
-KBUILD_CFLAGS_MODULE	+= -mcmodel=large
-endif
-
 ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
 KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds
 endif
@@ -97,12 +92,14 @@ else
 TEXT_OFFSET := 0x00080000
 endif
 
-# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
+# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT))
+#				 - (1 << (64 - KASAN_SHADOW_SCALE_SHIFT))
 # in 32-bit arithmetic
+KASAN_SHADOW_SCALE_SHIFT := 3
 KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
-			(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
-			+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
-			- (1 << (64 - 32 - 3)) )) )
+	(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
+	+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) \
+	- (1 << (64 - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) )) )
 
 export	TEXT_OFFSET GZFLAGS
 
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 285c36c7b408..cb5a243110c4 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -113,4 +113,10 @@ config CRYPTO_AES_ARM64_BS
 	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
+config CRYPTO_SPECK_NEON
+	tristate "NEON accelerated Speck cipher algorithms"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_SPECK
+
 endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index cee9b8d9830b..8df9f326f449 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -53,20 +53,21 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
 
+obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
+speck-neon-y := speck-neon-core.o speck-neon-glue.o
+
 obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
 aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
 aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
 
-AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
-AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
-
 CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
 
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
+ifdef REGENERATE_ARM64_CRYPTO
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) void $(@)
 
@@ -75,5 +76,6 @@ $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
 
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
+endif
 
 .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index a1254036f2b1..68b11aa690e4 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -107,11 +107,13 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 }
 
 static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
-			   u32 abytes, u32 *macp, bool use_neon)
+			   u32 abytes, u32 *macp)
 {
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
+		kernel_neon_begin();
 		ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
 				     num_rounds(key));
+		kernel_neon_end();
 	} else {
 		if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
 			int added = min(abytes, AES_BLOCK_SIZE - *macp);
@@ -143,8 +145,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
 	}
 }
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
-				   bool use_neon)
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -163,7 +164,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
 		ltag.len = 6;
 	}
 
-	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
+	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp);
 	scatterwalk_start(&walk, req->src);
 
 	do {
@@ -175,7 +176,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
 			n = scatterwalk_clamp(&walk, len);
 		}
 		p = scatterwalk_map(&walk);
-		ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
+		ccm_update_mac(ctx, mac, p, n, &macp);
 		len -= n;
 
 		scatterwalk_unmap(p);
@@ -242,43 +243,42 @@ static int ccm_encrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
-	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	if (likely(use_neon))
-		kernel_neon_begin();
-
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac, use_neon);
+		ccm_calculate_auth_mac(req, mac);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
 		while (walk.nbytes) {
 			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
 			if (walk.nbytes == walk.total)
 				tail = 0;
 
+			kernel_neon_begin();
 			ce_aes_ccm_encrypt(walk.dst.virt.addr,
 					   walk.src.virt.addr,
 					   walk.nbytes - tail, ctx->key_enc,
 					   num_rounds(ctx), mac, walk.iv);
+			kernel_neon_end();
 
 			err = skcipher_walk_done(&walk, tail);
 		}
-		if (!err)
+		if (!err) {
+			kernel_neon_begin();
 			ce_aes_ccm_final(mac, buf, ctx->key_enc,
 					 num_rounds(ctx));
-
-		kernel_neon_end();
+			kernel_neon_end();
+		}
 	} else {
 		err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
 	}
@@ -301,43 +301,42 @@ static int ccm_decrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
-	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	if (likely(use_neon))
-		kernel_neon_begin();
-
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac, use_neon);
+		ccm_calculate_auth_mac(req, mac);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
 		while (walk.nbytes) {
 			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
 			if (walk.nbytes == walk.total)
 				tail = 0;
 
+			kernel_neon_begin();
 			ce_aes_ccm_decrypt(walk.dst.virt.addr,
 					   walk.src.virt.addr,
 					   walk.nbytes - tail, ctx->key_enc,
 					   num_rounds(ctx), mac, walk.iv);
+			kernel_neon_end();
 
 			err = skcipher_walk_done(&walk, tail);
 		}
-		if (!err)
+		if (!err) {
+			kernel_neon_begin();
 			ce_aes_ccm_final(mac, buf, ctx->key_enc,
 					 num_rounds(ctx));
-
-		kernel_neon_end();
+			kernel_neon_end();
+		}
 	} else {
 		err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
 	}
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 2fa850e86aa8..253188fb8cb0 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -64,17 +64,17 @@ MODULE_LICENSE("GPL v2");
 
 /* defined in aes-modes.S */
 asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, int first);
+				int rounds, int blocks);
 asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, int first);
+				int rounds, int blocks);
 
 asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 iv[], int first);
+				int rounds, int blocks, u8 iv[]);
 asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 iv[], int first);
+				int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 ctr[], int first);
+				int rounds, int blocks, u8 ctr[]);
 
 asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
 				int rounds, int blocks, u8 const rk2[], u8 iv[],
@@ -133,19 +133,19 @@ static int ecb_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, first);
+				(u8 *)ctx->key_enc, rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -153,19 +153,19 @@ static int ecb_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_dec, rounds, blocks, first);
+				(u8 *)ctx->key_dec, rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -173,20 +173,19 @@ static int cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -194,20 +193,19 @@ static int cbc_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_dec, rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -215,20 +213,18 @@ static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	first = 1;
-	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-		first = 0;
+		kernel_neon_end();
 	}
 	if (walk.nbytes) {
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
@@ -241,12 +237,13 @@ static int ctr_encrypt(struct skcipher_request *req)
 		 */
 		blocks = -1;
 
+		kernel_neon_begin();
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
-				blocks, walk.iv, first);
+				blocks, walk.iv);
+		kernel_neon_end();
 		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -270,16 +267,16 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		kernel_neon_begin();
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_enc, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -292,16 +289,16 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		kernel_neon_begin();
 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_dec, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -425,7 +422,7 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
 	/* encrypt the zero vector */
 	kernel_neon_begin();
-	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1);
+	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
 	kernel_neon_end();
 
 	cmac_gf128_mul_by_x(consts, consts);
@@ -454,8 +451,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 		return err;
 
 	kernel_neon_begin();
-	aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1);
-	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0);
+	aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
+	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
 	kernel_neon_end();
 
 	return cbcmac_setkey(tfm, key, sizeof(key));
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 2674d43d1384..a68412e1e3a4 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -13,127 +13,39 @@
 	.text
 	.align		4
 
-/*
- * There are several ways to instantiate this code:
- * - no interleave, all inline
- * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
- * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
- * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
- * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
- *
- * Macros imported by this code:
- * - enc_prepare	- setup NEON registers for encryption
- * - dec_prepare	- setup NEON registers for decryption
- * - enc_switch_key	- change to new key after having prepared for encryption
- * - encrypt_block	- encrypt a single block
- * - decrypt block	- decrypt a single block
- * - encrypt_block2x	- encrypt 2 blocks in parallel (if INTERLEAVE == 2)
- * - decrypt_block2x	- decrypt 2 blocks in parallel (if INTERLEAVE == 2)
- * - encrypt_block4x	- encrypt 4 blocks in parallel (if INTERLEAVE == 4)
- * - decrypt_block4x	- decrypt 4 blocks in parallel (if INTERLEAVE == 4)
- */
-
-#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
-#define FRAME_PUSH	stp x29, x30, [sp,#-16]! ; mov x29, sp
-#define FRAME_POP	ldp x29, x30, [sp],#16
-
-#if INTERLEAVE == 2
-
-aes_encrypt_block2x:
-	encrypt_block2x	v0, v1, w3, x2, x6, w7
-	ret
-ENDPROC(aes_encrypt_block2x)
-
-aes_decrypt_block2x:
-	decrypt_block2x	v0, v1, w3, x2, x6, w7
-	ret
-ENDPROC(aes_decrypt_block2x)
-
-#elif INTERLEAVE == 4
-
 aes_encrypt_block4x:
-	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
 ENDPROC(aes_encrypt_block4x)
 
 aes_decrypt_block4x:
-	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
 ENDPROC(aes_decrypt_block4x)
 
-#else
-#error INTERLEAVE should equal 2 or 4
-#endif
-
-	.macro		do_encrypt_block2x
-	bl		aes_encrypt_block2x
-	.endm
-
-	.macro		do_decrypt_block2x
-	bl		aes_decrypt_block2x
-	.endm
-
-	.macro		do_encrypt_block4x
-	bl		aes_encrypt_block4x
-	.endm
-
-	.macro		do_decrypt_block4x
-	bl		aes_decrypt_block4x
-	.endm
-
-#else
-#define FRAME_PUSH
-#define FRAME_POP
-
-	.macro		do_encrypt_block2x
-	encrypt_block2x	v0, v1, w3, x2, x6, w7
-	.endm
-
-	.macro		do_decrypt_block2x
-	decrypt_block2x	v0, v1, w3, x2, x6, w7
-	.endm
-
-	.macro		do_encrypt_block4x
-	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
-	.endm
-
-	.macro		do_decrypt_block4x
-	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
-	.endm
-
-#endif
-
 	/*
 	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, int first)
+	 *		   int blocks)
 	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, int first)
+	 *		   int blocks)
 	 */
 
 AES_ENTRY(aes_ecb_encrypt)
-	FRAME_PUSH
-	cbz		w5, .LecbencloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	enc_prepare	w3, x2, x5
 
 .LecbencloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lecbenc1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
-	do_encrypt_block2x
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LecbencloopNx
 .Lecbenc1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lecbencout
-#endif
 .Lecbencloop:
 	ld1		{v0.16b}, [x1], #16		/* get next pt block */
 	encrypt_block	v0, w3, x2, x5, w6
@@ -141,35 +53,27 @@ AES_ENTRY(aes_ecb_encrypt)
 	subs		w4, w4, #1
 	bne		.Lecbencloop
 .Lecbencout:
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_ecb_encrypt)
 
 
 AES_ENTRY(aes_ecb_decrypt)
-	FRAME_PUSH
-	cbz		w5, .LecbdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	dec_prepare	w3, x2, x5
 
 .LecbdecloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lecbdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	do_decrypt_block2x
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LecbdecloopNx
 .Lecbdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lecbdecout
-#endif
 .Lecbdecloop:
 	ld1		{v0.16b}, [x1], #16		/* get next ct block */
 	decrypt_block	v0, w3, x2, x5, w6
@@ -177,62 +81,68 @@ AES_ENTRY(aes_ecb_decrypt)
 	subs		w4, w4, #1
 	bne		.Lecbdecloop
 .Lecbdecout:
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_ecb_decrypt)
 
 
 	/*
 	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[], int first)
+	 *		   int blocks, u8 iv[])
 	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[], int first)
+	 *		   int blocks, u8 iv[])
 	 */
 
 AES_ENTRY(aes_cbc_encrypt)
-	cbz		w6, .Lcbcencloop
-
-	ld1		{v0.16b}, [x5]			/* get iv */
+	ld1		{v4.16b}, [x5]			/* get iv */
 	enc_prepare	w3, x2, x6
 
-.Lcbcencloop:
-	ld1		{v1.16b}, [x1], #16		/* get next pt block */
-	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
+.Lcbcencloop4x:
+	subs		w4, w4, #4
+	bmi		.Lcbcenc1x
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
 	encrypt_block	v0, w3, x2, x6, w7
-	st1		{v0.16b}, [x0], #16
+	eor		v1.16b, v1.16b, v0.16b
+	encrypt_block	v1, w3, x2, x6, w7
+	eor		v2.16b, v2.16b, v1.16b
+	encrypt_block	v2, w3, x2, x6, w7
+	eor		v3.16b, v3.16b, v2.16b
+	encrypt_block	v3, w3, x2, x6, w7
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v3.16b
+	b		.Lcbcencloop4x
+.Lcbcenc1x:
+	adds		w4, w4, #4
+	beq		.Lcbcencout
+.Lcbcencloop:
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
+	encrypt_block	v4, w3, x2, x6, w7
+	st1		{v4.16b}, [x0], #16
 	subs		w4, w4, #1
 	bne		.Lcbcencloop
-	st1		{v0.16b}, [x5]			/* return iv */
+.Lcbcencout:
+	st1		{v4.16b}, [x5]			/* return iv */
 	ret
 AES_ENDPROC(aes_cbc_encrypt)
 
 
 AES_ENTRY(aes_cbc_decrypt)
-	FRAME_PUSH
-	cbz		w6, .LcbcdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	ld1		{v7.16b}, [x5]			/* get iv */
 	dec_prepare	w3, x2, x6
 
 .LcbcdecloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lcbcdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	mov		v2.16b, v0.16b
-	mov		v3.16b, v1.16b
-	do_decrypt_block2x
-	eor		v0.16b, v0.16b, v7.16b
-	eor		v1.16b, v1.16b, v2.16b
-	mov		v7.16b, v3.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	mov		v4.16b, v0.16b
 	mov		v5.16b, v1.16b
 	mov		v6.16b, v2.16b
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	sub		x1, x1, #16
 	eor		v0.16b, v0.16b, v7.16b
 	eor		v1.16b, v1.16b, v4.16b
@@ -240,12 +150,10 @@ AES_ENTRY(aes_cbc_decrypt)
 	eor		v2.16b, v2.16b, v5.16b
 	eor		v3.16b, v3.16b, v6.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LcbcdecloopNx
 .Lcbcdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lcbcdecout
-#endif
 .Lcbcdecloop:
 	ld1		{v1.16b}, [x1], #16		/* get next ct block */
 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
@@ -256,49 +164,33 @@ AES_ENTRY(aes_cbc_decrypt)
 	subs		w4, w4, #1
 	bne		.Lcbcdecloop
 .Lcbcdecout:
-	FRAME_POP
 	st1		{v7.16b}, [x5]			/* return iv */
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_cbc_decrypt)
 
 
 	/*
 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 ctr[], int first)
+	 *		   int blocks, u8 ctr[])
 	 */
 
 AES_ENTRY(aes_ctr_encrypt)
-	FRAME_PUSH
-	cbz		w6, .Lctrnotfirst	/* 1st time around? */
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
+
 	enc_prepare	w3, x2, x6
 	ld1		{v4.16b}, [x5]
 
-.Lctrnotfirst:
-	umov		x8, v4.d[1]		/* keep swabbed ctr in reg */
-	rev		x8, x8
-#if INTERLEAVE >= 2
-	cmn		w8, w4			/* 32 bit overflow? */
+	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x6, x6
+	cmn		w6, w4			/* 32 bit overflow? */
 	bcs		.Lctrloop
 .LctrloopNx:
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lctr1x
-#if INTERLEAVE == 2
-	mov		v0.8b, v4.8b
-	mov		v1.8b, v4.8b
-	rev		x7, x8
-	add		x8, x8, #1
-	ins		v0.d[1], x7
-	rev		x7, x8
-	add		x8, x8, #1
-	ins		v1.d[1], x7
-	ld1		{v2.16b-v3.16b}, [x1], #32	/* get 2 input blocks */
-	do_encrypt_block2x
-	eor		v0.16b, v0.16b, v2.16b
-	eor		v1.16b, v1.16b, v3.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
-	dup		v7.4s, w8
+	dup		v7.4s, w6
 	mov		v0.16b, v4.16b
 	add		v7.4s, v7.4s, v8.4s
 	mov		v1.16b, v4.16b
@@ -309,29 +201,27 @@ AES_ENTRY(aes_ctr_encrypt)
 	mov		v2.s[3], v8.s[1]
 	mov		v3.s[3], v8.s[2]
 	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	eor		v0.16b, v5.16b, v0.16b
 	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
 	eor		v1.16b, v6.16b, v1.16b
 	eor		v2.16b, v7.16b, v2.16b
 	eor		v3.16b, v5.16b, v3.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
-	add		x8, x8, #INTERLEAVE
-#endif
-	rev		x7, x8
+	add		x6, x6, #4
+	rev		x7, x6
 	ins		v4.d[1], x7
 	cbz		w4, .Lctrout
 	b		.LctrloopNx
 .Lctr1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lctrout
-#endif
 .Lctrloop:
 	mov		v0.16b, v4.16b
-	encrypt_block	v0, w3, x2, x6, w7
+	encrypt_block	v0, w3, x2, x8, w7
 
-	adds		x8, x8, #1		/* increment BE ctr */
-	rev		x7, x8
+	adds		x6, x6, #1		/* increment BE ctr */
+	rev		x7, x6
 	ins		v4.d[1], x7
 	bcs		.Lctrcarry		/* overflow? */
 
@@ -345,12 +235,12 @@ AES_ENTRY(aes_ctr_encrypt)
 
 .Lctrout:
 	st1		{v4.16b}, [x5]		/* return next CTR value */
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 
 .Lctrtailblock:
 	st1		{v0.16b}, [x0]
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 
 .Lctrcarry:
@@ -384,39 +274,26 @@ CPU_LE(	.quad		1, 0x87		)
 CPU_BE(	.quad		0x87, 1		)
 
 AES_ENTRY(aes_xts_encrypt)
-	FRAME_PUSH
-	cbz		w7, .LxtsencloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	ld1		{v4.16b}, [x6]
-	enc_prepare	w3, x5, x6
-	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
-	enc_switch_key	w3, x2, x6
+	cbz		w7, .Lxtsencnotfirst
+
+	enc_prepare	w3, x5, x8
+	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
+	enc_switch_key	w3, x2, x8
 	ldr		q7, .Lxts_mul_x
 	b		.LxtsencNx
 
+.Lxtsencnotfirst:
+	enc_prepare	w3, x2, x8
 .LxtsencloopNx:
 	ldr		q7, .Lxts_mul_x
 	next_tweak	v4, v4, v7, v8
 .LxtsencNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lxtsenc1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
-	next_tweak	v5, v4, v7, v8
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	do_encrypt_block2x
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-	cbz		w4, .LxtsencoutNx
-	next_tweak	v4, v5, v7, v8
-	b		.LxtsencNx
-.LxtsencoutNx:
-	mov		v4.16b, v5.16b
-	b		.Lxtsencout
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
 	next_tweak	v5, v4, v7, v8
 	eor		v0.16b, v0.16b, v4.16b
@@ -425,7 +302,7 @@ AES_ENTRY(aes_xts_encrypt)
 	eor		v2.16b, v2.16b, v6.16b
 	next_tweak	v7, v6, v7, v8
 	eor		v3.16b, v3.16b, v7.16b
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
@@ -434,15 +311,13 @@ AES_ENTRY(aes_xts_encrypt)
 	mov		v4.16b, v7.16b
 	cbz		w4, .Lxtsencout
 	b		.LxtsencloopNx
-#endif
 .Lxtsenc1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lxtsencout
-#endif
 .Lxtsencloop:
 	ld1		{v1.16b}, [x1], #16
 	eor		v0.16b, v1.16b, v4.16b
-	encrypt_block	v0, w3, x2, x6, w7
+	encrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
 	st1		{v0.16b}, [x0], #16
 	subs		w4, w4, #1
@@ -450,45 +325,33 @@ AES_ENTRY(aes_xts_encrypt)
 	next_tweak	v4, v4, v7, v8
 	b		.Lxtsencloop
 .Lxtsencout:
-	FRAME_POP
+	st1		{v4.16b}, [x6]
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_xts_encrypt)
 
 
 AES_ENTRY(aes_xts_decrypt)
-	FRAME_PUSH
-	cbz		w7, .LxtsdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	ld1		{v4.16b}, [x6]
-	enc_prepare	w3, x5, x6
-	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
-	dec_prepare	w3, x2, x6
+	cbz		w7, .Lxtsdecnotfirst
+
+	enc_prepare	w3, x5, x8
+	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
+	dec_prepare	w3, x2, x8
 	ldr		q7, .Lxts_mul_x
 	b		.LxtsdecNx
 
+.Lxtsdecnotfirst:
+	dec_prepare	w3, x2, x8
 .LxtsdecloopNx:
 	ldr		q7, .Lxts_mul_x
 	next_tweak	v4, v4, v7, v8
 .LxtsdecNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lxtsdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	next_tweak	v5, v4, v7, v8
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	do_decrypt_block2x
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-	cbz		w4, .LxtsdecoutNx
-	next_tweak	v4, v5, v7, v8
-	b		.LxtsdecNx
-.LxtsdecoutNx:
-	mov		v4.16b, v5.16b
-	b		.Lxtsdecout
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	next_tweak	v5, v4, v7, v8
 	eor		v0.16b, v0.16b, v4.16b
@@ -497,7 +360,7 @@ AES_ENTRY(aes_xts_decrypt)
 	eor		v2.16b, v2.16b, v6.16b
 	next_tweak	v7, v6, v7, v8
 	eor		v3.16b, v3.16b, v7.16b
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
@@ -506,15 +369,13 @@ AES_ENTRY(aes_xts_decrypt)
 	mov		v4.16b, v7.16b
 	cbz		w4, .Lxtsdecout
 	b		.LxtsdecloopNx
-#endif
 .Lxtsdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lxtsdecout
-#endif
 .Lxtsdecloop:
 	ld1		{v1.16b}, [x1], #16
 	eor		v0.16b, v1.16b, v4.16b
-	decrypt_block	v0, w3, x2, x6, w7
+	decrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
 	st1		{v0.16b}, [x0], #16
 	subs		w4, w4, #1
@@ -522,7 +383,8 @@ AES_ENTRY(aes_xts_decrypt)
 	next_tweak	v4, v4, v7, v8
 	b		.Lxtsdecloop
 .Lxtsdecout:
-	FRAME_POP
+	st1		{v4.16b}, [x6]
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_xts_decrypt)
 
@@ -533,8 +395,28 @@ AES_ENDPROC(aes_xts_decrypt)
 AES_ENTRY(aes_mac_update)
 	ld1		{v0.16b}, [x4]			/* get dg */
 	enc_prepare	w2, x1, x7
-	cbnz		w5, .Lmacenc
+	cbz		w5, .Lmacloop4x
+
+	encrypt_block	v0, w2, x1, x7, w8
 
+.Lmacloop4x:
+	subs		w3, w3, #4
+	bmi		.Lmac1x
+	ld1		{v1.16b-v4.16b}, [x0], #64	/* get next pt block */
+	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v2.16b
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v3.16b
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v4.16b
+	cmp		w3, wzr
+	csinv		x5, x6, xzr, eq
+	cbz		w5, .Lmacout
+	encrypt_block	v0, w2, x1, x7, w8
+	b		.Lmacloop4x
+.Lmac1x:
+	add		w3, w3, #4
 .Lmacloop:
 	cbz		w3, .Lmacout
 	ld1		{v1.16b}, [x0], #16		/* get next pt block */
@@ -544,7 +426,6 @@ AES_ENTRY(aes_mac_update)
 	csinv		x5, x6, xzr, eq
 	cbz		w5, .Lmacout
 
-.Lmacenc:
 	encrypt_block	v0, w2, x1, x7, w8
 	b		.Lmacloop
 
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index c55d68ccb89f..e7a95a566462 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -46,10 +46,9 @@ asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
 
 /* borrowed from aes-neon-blk.ko */
 asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
-				     int rounds, int blocks, int first);
+				     int rounds, int blocks);
 asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
-				     int rounds, int blocks, u8 iv[],
-				     int first);
+				     int rounds, int blocks, u8 iv[]);
 
 struct aesbs_ctx {
 	u8	rk[13 * (8 * AES_BLOCK_SIZE) + 32];
@@ -100,9 +99,8 @@ static int __ecb_crypt(struct skcipher_request *req,
 	struct skcipher_walk walk;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
@@ -110,12 +108,13 @@ static int __ecb_crypt(struct skcipher_request *req,
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		kernel_neon_begin();
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
 		   ctx->rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -157,22 +156,21 @@ static int cbc_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
-	int err, first = 1;
+	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
 		/* fall back to the non-bitsliced NEON implementation */
+		kernel_neon_begin();
 		neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				     ctx->enc, ctx->key.rounds, blocks, walk.iv,
-				     first);
+				     ctx->enc, ctx->key.rounds, blocks,
+				     walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-		first = 0;
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -183,9 +181,8 @@ static int cbc_decrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
@@ -193,13 +190,14 @@ static int cbc_decrypt(struct skcipher_request *req)
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		kernel_neon_begin();
 		aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				  ctx->key.rk, ctx->key.rounds, blocks,
 				  walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -231,9 +229,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 	u8 buf[AES_BLOCK_SIZE];
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes > 0) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
@@ -244,8 +241,10 @@ static int ctr_encrypt(struct skcipher_request *req)
 			final = NULL;
 		}
 
+		kernel_neon_begin();
 		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
+		kernel_neon_end();
 
 		if (final) {
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -260,8 +259,6 @@ static int ctr_encrypt(struct skcipher_request *req)
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
-
 	return err;
 }
 
@@ -306,12 +303,11 @@ static int __xts_crypt(struct skcipher_request *req,
 	struct skcipher_walk walk;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	kernel_neon_begin();
-
-	neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey,
-			     ctx->key.rounds, 1, 1);
+	neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1);
+	kernel_neon_end();
 
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -320,13 +316,13 @@ static int __xts_crypt(struct skcipher_request *req,
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		kernel_neon_begin();
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
 		   ctx->key.rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
-
 	return err;
 }
 
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
index cbdb75d15cd0..727579c93ded 100644
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ b/arch/arm64/crypto/chacha20-neon-glue.c
@@ -37,12 +37,19 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 	u8 buf[CHACHA20_BLOCK_SIZE];
 
 	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+		kernel_neon_begin();
 		chacha20_4block_xor_neon(state, dst, src);
+		kernel_neon_end();
 		bytes -= CHACHA20_BLOCK_SIZE * 4;
 		src += CHACHA20_BLOCK_SIZE * 4;
 		dst += CHACHA20_BLOCK_SIZE * 4;
 		state[12] += 4;
 	}
+
+	if (!bytes)
+		return;
+
+	kernel_neon_begin();
 	while (bytes >= CHACHA20_BLOCK_SIZE) {
 		chacha20_block_xor_neon(state, dst, src);
 		bytes -= CHACHA20_BLOCK_SIZE;
@@ -55,6 +62,7 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 		chacha20_block_xor_neon(state, buf, buf);
 		memcpy(dst, buf, bytes);
 	}
+	kernel_neon_end();
 }
 
 static int chacha20_neon(struct skcipher_request *req)
@@ -68,11 +76,10 @@ static int chacha20_neon(struct skcipher_request *req)
 	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
 		return crypto_chacha20_crypt(req);
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	crypto_chacha20_init(state, ctx, walk.iv);
 
-	kernel_neon_begin();
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
 
@@ -83,7 +90,6 @@ static int chacha20_neon(struct skcipher_request *req)
 				nbytes);
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
-	kernel_neon_end();
 
 	return err;
 }
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index b064d925fe2a..e8880ccdc71f 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -89,21 +89,32 @@ static struct shash_alg algs[] = { {
 static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
 			      unsigned int len)
 {
-	/*
-	 * Stacking and unstacking a substantial slice of the NEON register
-	 * file may significantly affect performance for small updates when
-	 * executing in interrupt context, so fall back to the scalar code
-	 * in that case.
-	 */
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
 	if (!may_use_simd())
 		return sha256_base_do_update(desc, data, len,
 				(sha256_block_fn *)sha256_block_data_order);
 
-	kernel_neon_begin();
-	sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_neon);
-	kernel_neon_end();
+	while (len > 0) {
+		unsigned int chunk = len;
+
+		/*
+		 * Don't hog the CPU for the entire time it takes to process all
+		 * input when running on a preemptible kernel, but process the
+		 * data block by block instead.
+		 */
+		if (IS_ENABLED(CONFIG_PREEMPT) &&
+		    chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
+			chunk = SHA256_BLOCK_SIZE -
+				sctx->count % SHA256_BLOCK_SIZE;
 
+		kernel_neon_begin();
+		sha256_base_do_update(desc, data, chunk,
+				      (sha256_block_fn *)sha256_block_neon);
+		kernel_neon_end();
+		data += chunk;
+		len -= chunk;
+	}
 	return 0;
 }
 
@@ -117,10 +128,9 @@ static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
 		sha256_base_do_finalize(desc,
 				(sha256_block_fn *)sha256_block_data_order);
 	} else {
-		kernel_neon_begin();
 		if (len)
-			sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_neon);
+			sha256_update_neon(desc, data, len);
+		kernel_neon_begin();
 		sha256_base_do_finalize(desc,
 				(sha256_block_fn *)sha256_block_neon);
 		kernel_neon_end();
diff --git a/arch/arm64/crypto/speck-neon-core.S b/arch/arm64/crypto/speck-neon-core.S
new file mode 100644
index 000000000000..b14463438b09
--- /dev/null
+++ b/arch/arm64/crypto/speck-neon-core.S
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+	.text
+
+	// arguments
+	ROUND_KEYS	.req	x0	// const {u64,u32} *round_keys
+	NROUNDS		.req	w1	// int nrounds
+	NROUNDS_X	.req	x1
+	DST		.req	x2	// void *dst
+	SRC		.req	x3	// const void *src
+	NBYTES		.req	w4	// unsigned int nbytes
+	TWEAK		.req	x5	// void *tweak
+
+	// registers which hold the data being encrypted/decrypted
+	// (underscores avoid a naming collision with ARM64 registers x0-x3)
+	X_0		.req	v0
+	Y_0		.req	v1
+	X_1		.req	v2
+	Y_1		.req	v3
+	X_2		.req	v4
+	Y_2		.req	v5
+	X_3		.req	v6
+	Y_3		.req	v7
+
+	// the round key, duplicated in all lanes
+	ROUND_KEY	.req	v8
+
+	// index vector for tbl-based 8-bit rotates
+	ROTATE_TABLE	.req	v9
+	ROTATE_TABLE_Q	.req	q9
+
+	// temporary registers
+	TMP0		.req	v10
+	TMP1		.req	v11
+	TMP2		.req	v12
+	TMP3		.req	v13
+
+	// multiplication table for updating XTS tweaks
+	GFMUL_TABLE	.req	v14
+	GFMUL_TABLE_Q	.req	q14
+
+	// next XTS tweak value(s)
+	TWEAKV_NEXT	.req	v15
+
+	// XTS tweaks for the blocks currently being encrypted/decrypted
+	TWEAKV0		.req	v16
+	TWEAKV1		.req	v17
+	TWEAKV2		.req	v18
+	TWEAKV3		.req	v19
+	TWEAKV4		.req	v20
+	TWEAKV5		.req	v21
+	TWEAKV6		.req	v22
+	TWEAKV7		.req	v23
+
+	.align		4
+.Lror64_8_table:
+	.octa		0x080f0e0d0c0b0a090007060504030201
+.Lror32_8_table:
+	.octa		0x0c0f0e0d080b0a090407060500030201
+.Lrol64_8_table:
+	.octa		0x0e0d0c0b0a09080f0605040302010007
+.Lrol32_8_table:
+	.octa		0x0e0d0c0f0a09080b0605040702010003
+.Lgf128mul_table:
+	.octa		0x00000000000000870000000000000001
+.Lgf64mul_table:
+	.octa		0x0000000000000000000000002d361b00
+
+/*
+ * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
+ *
+ * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
+ * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
+ * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
+ * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
+ */
+.macro _speck_round_128bytes	n, lanes
+
+	// x = ror(x, 8)
+	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
+	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
+	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
+	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
+
+	// x += y
+	add		X_0.\lanes, X_0.\lanes, Y_0.\lanes
+	add		X_1.\lanes, X_1.\lanes, Y_1.\lanes
+	add		X_2.\lanes, X_2.\lanes, Y_2.\lanes
+	add		X_3.\lanes, X_3.\lanes, Y_3.\lanes
+
+	// x ^= k
+	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
+	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
+	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
+	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
+
+	// y = rol(y, 3)
+	shl		TMP0.\lanes, Y_0.\lanes, #3
+	shl		TMP1.\lanes, Y_1.\lanes, #3
+	shl		TMP2.\lanes, Y_2.\lanes, #3
+	shl		TMP3.\lanes, Y_3.\lanes, #3
+	sri		TMP0.\lanes, Y_0.\lanes, #(\n - 3)
+	sri		TMP1.\lanes, Y_1.\lanes, #(\n - 3)
+	sri		TMP2.\lanes, Y_2.\lanes, #(\n - 3)
+	sri		TMP3.\lanes, Y_3.\lanes, #(\n - 3)
+
+	// y ^= x
+	eor		Y_0.16b, TMP0.16b, X_0.16b
+	eor		Y_1.16b, TMP1.16b, X_1.16b
+	eor		Y_2.16b, TMP2.16b, X_2.16b
+	eor		Y_3.16b, TMP3.16b, X_3.16b
+.endm
+
+/*
+ * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
+ *
+ * This is the inverse of _speck_round_128bytes().
+ */
+.macro _speck_unround_128bytes	n, lanes
+
+	// y ^= x
+	eor		TMP0.16b, Y_0.16b, X_0.16b
+	eor		TMP1.16b, Y_1.16b, X_1.16b
+	eor		TMP2.16b, Y_2.16b, X_2.16b
+	eor		TMP3.16b, Y_3.16b, X_3.16b
+
+	// y = ror(y, 3)
+	ushr		Y_0.\lanes, TMP0.\lanes, #3
+	ushr		Y_1.\lanes, TMP1.\lanes, #3
+	ushr		Y_2.\lanes, TMP2.\lanes, #3
+	ushr		Y_3.\lanes, TMP3.\lanes, #3
+	sli		Y_0.\lanes, TMP0.\lanes, #(\n - 3)
+	sli		Y_1.\lanes, TMP1.\lanes, #(\n - 3)
+	sli		Y_2.\lanes, TMP2.\lanes, #(\n - 3)
+	sli		Y_3.\lanes, TMP3.\lanes, #(\n - 3)
+
+	// x ^= k
+	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
+	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
+	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
+	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
+
+	// x -= y
+	sub		X_0.\lanes, X_0.\lanes, Y_0.\lanes
+	sub		X_1.\lanes, X_1.\lanes, Y_1.\lanes
+	sub		X_2.\lanes, X_2.\lanes, Y_2.\lanes
+	sub		X_3.\lanes, X_3.\lanes, Y_3.\lanes
+
+	// x = rol(x, 8)
+	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
+	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
+	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
+	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
+.endm
+
+.macro _next_xts_tweak	next, cur, tmp, n
+.if \n == 64
+	/*
+	 * Calculate the next tweak by multiplying the current one by x,
+	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
+	 */
+	sshr		\tmp\().2d, \cur\().2d, #63
+	and		\tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
+	shl		\next\().2d, \cur\().2d, #1
+	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+	eor		\next\().16b, \next\().16b, \tmp\().16b
+.else
+	/*
+	 * Calculate the next two tweaks by multiplying the current ones by x^2,
+	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
+	 */
+	ushr		\tmp\().2d, \cur\().2d, #62
+	shl		\next\().2d, \cur\().2d, #2
+	tbl		\tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
+	eor		\next\().16b, \next\().16b, \tmp\().16b
+.endif
+.endm
+
+/*
+ * _speck_xts_crypt() - Speck-XTS encryption/decryption
+ *
+ * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
+ * using Speck-XTS, specifically the variant with a block size of '2n' and round
+ * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
+ * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
+ * nonzero multiple of 128.
+ */
+.macro _speck_xts_crypt	n, lanes, decrypting
+
+	/*
+	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
+	 * round key rather than the first, since for decryption the round keys
+	 * are used in reverse order.
+	 */
+.if \decrypting
+	mov		NROUNDS, NROUNDS	/* zero the high 32 bits */
+.if \n == 64
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
+	sub		ROUND_KEYS, ROUND_KEYS, #8
+.else
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
+	sub		ROUND_KEYS, ROUND_KEYS, #4
+.endif
+.endif
+
+	// Load the index vector for tbl-based 8-bit rotates
+.if \decrypting
+	ldr		ROTATE_TABLE_Q, .Lrol\n\()_8_table
+.else
+	ldr		ROTATE_TABLE_Q, .Lror\n\()_8_table
+.endif
+
+	// One-time XTS preparation
+.if \n == 64
+	// Load first tweak
+	ld1		{TWEAKV0.16b}, [TWEAK]
+
+	// Load GF(2^128) multiplication table
+	ldr		GFMUL_TABLE_Q, .Lgf128mul_table
+.else
+	// Load first tweak
+	ld1		{TWEAKV0.8b}, [TWEAK]
+
+	// Load GF(2^64) multiplication table
+	ldr		GFMUL_TABLE_Q, .Lgf64mul_table
+
+	// Calculate second tweak, packing it together with the first
+	ushr		TMP0.2d, TWEAKV0.2d, #63
+	shl		TMP1.2d, TWEAKV0.2d, #1
+	tbl		TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
+	eor		TMP0.8b, TMP0.8b, TMP1.8b
+	mov		TWEAKV0.d[1], TMP0.d[0]
+.endif
+
+.Lnext_128bytes_\@:
+
+	// Calculate XTS tweaks for next 128 bytes
+	_next_xts_tweak	TWEAKV1, TWEAKV0, TMP0, \n
+	_next_xts_tweak	TWEAKV2, TWEAKV1, TMP0, \n
+	_next_xts_tweak	TWEAKV3, TWEAKV2, TMP0, \n
+	_next_xts_tweak	TWEAKV4, TWEAKV3, TMP0, \n
+	_next_xts_tweak	TWEAKV5, TWEAKV4, TMP0, \n
+	_next_xts_tweak	TWEAKV6, TWEAKV5, TMP0, \n
+	_next_xts_tweak	TWEAKV7, TWEAKV6, TMP0, \n
+	_next_xts_tweak	TWEAKV_NEXT, TWEAKV7, TMP0, \n
+
+	// Load the next source blocks into {X,Y}[0-3]
+	ld1		{X_0.16b-Y_1.16b}, [SRC], #64
+	ld1		{X_2.16b-Y_3.16b}, [SRC], #64
+
+	// XOR the source blocks with their XTS tweaks
+	eor		TMP0.16b, X_0.16b, TWEAKV0.16b
+	eor		Y_0.16b,  Y_0.16b, TWEAKV1.16b
+	eor		TMP1.16b, X_1.16b, TWEAKV2.16b
+	eor		Y_1.16b,  Y_1.16b, TWEAKV3.16b
+	eor		TMP2.16b, X_2.16b, TWEAKV4.16b
+	eor		Y_2.16b,  Y_2.16b, TWEAKV5.16b
+	eor		TMP3.16b, X_3.16b, TWEAKV6.16b
+	eor		Y_3.16b,  Y_3.16b, TWEAKV7.16b
+
+	/*
+	 * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
+	 * that the X[0-3] registers contain only the second halves of blocks,
+	 * and the Y[0-3] registers contain only the first halves of blocks.
+	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
+	 */
+	uzp2		X_0.\lanes, TMP0.\lanes, Y_0.\lanes
+	uzp1		Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
+	uzp2		X_1.\lanes, TMP1.\lanes, Y_1.\lanes
+	uzp1		Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
+	uzp2		X_2.\lanes, TMP2.\lanes, Y_2.\lanes
+	uzp1		Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
+	uzp2		X_3.\lanes, TMP3.\lanes, Y_3.\lanes
+	uzp1		Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
+
+	// Do the cipher rounds
+	mov		x6, ROUND_KEYS
+	mov		w7, NROUNDS
+.Lnext_round_\@:
+.if \decrypting
+	ld1r		{ROUND_KEY.\lanes}, [x6]
+	sub		x6, x6, #( \n / 8 )
+	_speck_unround_128bytes	\n, \lanes
+.else
+	ld1r		{ROUND_KEY.\lanes}, [x6], #( \n / 8 )
+	_speck_round_128bytes	\n, \lanes
+.endif
+	subs		w7, w7, #1
+	bne		.Lnext_round_\@
+
+	// Re-interleave the 'x' and 'y' elements of each block
+	zip1		TMP0.\lanes, Y_0.\lanes, X_0.\lanes
+	zip2		Y_0.\lanes,  Y_0.\lanes, X_0.\lanes
+	zip1		TMP1.\lanes, Y_1.\lanes, X_1.\lanes
+	zip2		Y_1.\lanes,  Y_1.\lanes, X_1.\lanes
+	zip1		TMP2.\lanes, Y_2.\lanes, X_2.\lanes
+	zip2		Y_2.\lanes,  Y_2.\lanes, X_2.\lanes
+	zip1		TMP3.\lanes, Y_3.\lanes, X_3.\lanes
+	zip2		Y_3.\lanes,  Y_3.\lanes, X_3.\lanes
+
+	// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
+	eor		X_0.16b, TMP0.16b, TWEAKV0.16b
+	eor		Y_0.16b, Y_0.16b,  TWEAKV1.16b
+	eor		X_1.16b, TMP1.16b, TWEAKV2.16b
+	eor		Y_1.16b, Y_1.16b,  TWEAKV3.16b
+	eor		X_2.16b, TMP2.16b, TWEAKV4.16b
+	eor		Y_2.16b, Y_2.16b,  TWEAKV5.16b
+	eor		X_3.16b, TMP3.16b, TWEAKV6.16b
+	eor		Y_3.16b, Y_3.16b,  TWEAKV7.16b
+	mov		TWEAKV0.16b, TWEAKV_NEXT.16b
+
+	// Store the ciphertext in the destination buffer
+	st1		{X_0.16b-Y_1.16b}, [DST], #64
+	st1		{X_2.16b-Y_3.16b}, [DST], #64
+
+	// Continue if there are more 128-byte chunks remaining
+	subs		NBYTES, NBYTES, #128
+	bne		.Lnext_128bytes_\@
+
+	// Store the next tweak and return
+.if \n == 64
+	st1		{TWEAKV_NEXT.16b}, [TWEAK]
+.else
+	st1		{TWEAKV_NEXT.8b}, [TWEAK]
+.endif
+	ret
+.endm
+
+ENTRY(speck128_xts_encrypt_neon)
+	_speck_xts_crypt	n=64, lanes=2d, decrypting=0
+ENDPROC(speck128_xts_encrypt_neon)
+
+ENTRY(speck128_xts_decrypt_neon)
+	_speck_xts_crypt	n=64, lanes=2d, decrypting=1
+ENDPROC(speck128_xts_decrypt_neon)
+
+ENTRY(speck64_xts_encrypt_neon)
+	_speck_xts_crypt	n=32, lanes=4s, decrypting=0
+ENDPROC(speck64_xts_encrypt_neon)
+
+ENTRY(speck64_xts_decrypt_neon)
+	_speck_xts_crypt	n=32, lanes=4s, decrypting=1
+ENDPROC(speck64_xts_decrypt_neon)
diff --git a/arch/arm64/crypto/speck-neon-glue.c b/arch/arm64/crypto/speck-neon-glue.c
new file mode 100644
index 000000000000..6e233aeb4ff4
--- /dev/null
+++ b/arch/arm64/crypto/speck-neon-glue.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ * (64-bit version; based on the 32-bit version)
+ *
+ * Copyright (c) 2018 Google, Inc
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/algapi.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/speck.h>
+#include <crypto/xts.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/* The assembly functions only handle multiples of 128 bytes */
+#define SPECK_NEON_CHUNK_SIZE	128
+
+/* Speck128 */
+
+struct speck128_xts_tfm_ctx {
+	struct speck128_tfm_ctx main_key;
+	struct speck128_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
+				     u8 *, const u8 *);
+typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
+					  const void *, unsigned int, void *);
+
+static __always_inline int
+__speck128_xts_crypt(struct skcipher_request *req,
+		     speck128_crypt_one_t crypt_one,
+		     speck128_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	le128 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
+			gf128mul_x_ble(&tweak, &tweak);
+
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck128_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
+				    speck128_xts_encrypt_neon);
+}
+
+static int speck128_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
+				    speck128_xts_decrypt_neon);
+}
+
+static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+/* Speck64 */
+
+struct speck64_xts_tfm_ctx {
+	struct speck64_tfm_ctx main_key;
+	struct speck64_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
+				    u8 *, const u8 *);
+typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
+					 const void *, unsigned int, void *);
+
+static __always_inline int
+__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
+		    speck64_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	__le64 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			*(__le64 *)dst = *(__le64 *)src ^ tweak;
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			*(__le64 *)dst ^= tweak;
+			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
+					    ((tweak & cpu_to_le64(1ULL << 63)) ?
+					     0x1B : 0));
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck64_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
+				   speck64_xts_encrypt_neon);
+}
+
+static int speck64_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
+				   speck64_xts_decrypt_neon);
+}
+
+static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+static struct skcipher_alg speck_algs[] = {
+	{
+		.base.cra_name		= "xts(speck128)",
+		.base.cra_driver_name	= "xts-speck128-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
+		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
+		.ivsize			= SPECK128_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck128_xts_setkey,
+		.encrypt		= speck128_xts_encrypt,
+		.decrypt		= speck128_xts_decrypt,
+	}, {
+		.base.cra_name		= "xts(speck64)",
+		.base.cra_driver_name	= "xts-speck64-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
+		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
+		.ivsize			= SPECK64_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck64_xts_setkey,
+		.encrypt		= speck64_xts_encrypt,
+		.decrypt		= speck64_xts_decrypt,
+	}
+};
+
+static int __init speck_neon_module_init(void)
+{
+	if (!(elf_hwcap & HWCAP_ASIMD))
+		return -ENODEV;
+	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_neon_module_exit(void)
+{
+	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_neon_module_init);
+module_exit(speck_neon_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("xts(speck128)");
+MODULE_ALIAS_CRYPTO("xts-speck128-neon");
+MODULE_ALIAS_CRYPTO("xts(speck64)");
+MODULE_ALIAS_CRYPTO("xts-speck64-neon");
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 9becba9ab392..e278f94df0c9 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -76,11 +76,6 @@ static inline u64 gic_read_iar_cavium_thunderx(void)
 	return irqstat;
 }
 
-static inline void gic_write_pmr(u32 val)
-{
-	write_sysreg_s(val, SYS_ICC_PMR_EL1);
-}
-
 static inline void gic_write_ctlr(u32 val)
 {
 	write_sysreg_s(val, SYS_ICC_CTLR_EL1);
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3c78835bba94..053d83e8db6f 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -202,25 +202,15 @@ lr	.req	x30		// link register
 
 /*
  * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
- * <symbol> is within the range +/- 4 GB of the PC when running
- * in core kernel context. In module context, a movz/movk sequence
- * is used, since modules may be loaded far away from the kernel
- * when KASLR is in effect.
+ * <symbol> is within the range +/- 4 GB of the PC.
  */
 	/*
 	 * @dst: destination register (64 bit wide)
 	 * @sym: name of the symbol
 	 */
 	.macro	adr_l, dst, sym
-#ifndef MODULE
 	adrp	\dst, \sym
 	add	\dst, \dst, :lo12:\sym
-#else
-	movz	\dst, #:abs_g3:\sym
-	movk	\dst, #:abs_g2_nc:\sym
-	movk	\dst, #:abs_g1_nc:\sym
-	movk	\dst, #:abs_g0_nc:\sym
-#endif
 	.endm
 
 	/*
@@ -231,7 +221,6 @@ lr	.req	x30		// link register
 	 *       the address
 	 */
 	.macro	ldr_l, dst, sym, tmp=
-#ifndef MODULE
 	.ifb	\tmp
 	adrp	\dst, \sym
 	ldr	\dst, [\dst, :lo12:\sym]
@@ -239,15 +228,6 @@ lr	.req	x30		// link register
 	adrp	\tmp, \sym
 	ldr	\dst, [\tmp, :lo12:\sym]
 	.endif
-#else
-	.ifb	\tmp
-	adr_l	\dst, \sym
-	ldr	\dst, [\dst]
-	.else
-	adr_l	\tmp, \sym
-	ldr	\dst, [\tmp]
-	.endif
-#endif
 	.endm
 
 	/*
@@ -257,28 +237,18 @@ lr	.req	x30		// link register
 	 *       while <src> needs to be preserved.
 	 */
 	.macro	str_l, src, sym, tmp
-#ifndef MODULE
 	adrp	\tmp, \sym
 	str	\src, [\tmp, :lo12:\sym]
-#else
-	adr_l	\tmp, \sym
-	str	\src, [\tmp]
-#endif
 	.endm
 
 	/*
-	 * @dst: Result of per_cpu(sym, smp_processor_id()), can be SP for
-	 *       non-module code
+	 * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
 	 * @sym: The name of the per-cpu variable
 	 * @tmp: scratch register
 	 */
 	.macro adr_this_cpu, dst, sym, tmp
-#ifndef MODULE
 	adrp	\tmp, \sym
 	add	\dst, \tmp, #:lo12:\sym
-#else
-	adr_l	\dst, \sym
-#endif
 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 	mrs	\tmp, tpidr_el1
 alternative_else
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index ea9bb4e0e9bb..9bbffc7a301f 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -20,8 +20,12 @@
 
 #define CTR_L1IP_SHIFT		14
 #define CTR_L1IP_MASK		3
+#define CTR_DMINLINE_SHIFT	16
+#define CTR_ERG_SHIFT		20
 #define CTR_CWG_SHIFT		24
 #define CTR_CWG_MASK		15
+#define CTR_IDC_SHIFT		28
+#define CTR_DIC_SHIFT		29
 
 #define CTR_L1IP(ctr)		(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
 
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index bef9f418f089..7dfcec4700fe 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -133,6 +133,9 @@ extern void flush_dcache_page(struct page *);
 
 static inline void __flush_icache_all(void)
 {
+	if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
+		return;
+
 	asm("ic	ialluis");
 	dsb(ish);
 }
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index ae852add053d..4f5fd2a36e6e 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -18,7 +18,8 @@
 #ifndef __ASM_CMPXCHG_H
 #define __ASM_CMPXCHG_H
 
-#include <linux/bug.h>
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
 
 #include <asm/atomic.h>
 #include <asm/barrier.h>
@@ -196,32 +197,6 @@ __CMPXCHG_GEN(_mb)
 	__ret; \
 })
 
-/* this_cpu_cmpxchg */
-#define _protect_cmpxchg_local(pcp, o, n)			\
-({								\
-	typeof(*raw_cpu_ptr(&(pcp))) __ret;			\
-	preempt_disable();					\
-	__ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n);	\
-	preempt_enable();					\
-	__ret;							\
-})
-
-#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2)		\
-({									\
-	int __ret;							\
-	preempt_disable();						\
-	__ret = cmpxchg_double_local(	raw_cpu_ptr(&(ptr1)),		\
-					raw_cpu_ptr(&(ptr2)),		\
-					o1, o2, n1, n2);		\
-	preempt_enable();						\
-	__ret;								\
-})
-
 #define __CMPWAIT_CASE(w, sz, name)					\
 static inline void __cmpwait_case_##name(volatile void *ptr,		\
 					 unsigned long val)		\
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index bb263820de13..21bb624e0a7a 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -45,7 +45,11 @@
 #define ARM64_HARDEN_BRANCH_PREDICTOR		24
 #define ARM64_HARDEN_BP_POST_GUEST_EXIT		25
 #define ARM64_HAS_RAS_EXTN			26
+#define ARM64_WORKAROUND_843419			27
+#define ARM64_HAS_CACHE_IDC			28
+#define ARM64_HAS_CACHE_DIC			29
+#define ARM64_HW_DBM				30
 
-#define ARM64_NCAPS				27
+#define ARM64_NCAPS				31
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 060e3a4008ab..09b0f2a80c8f 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -10,6 +10,7 @@
 #define __ASM_CPUFEATURE_H
 
 #include <asm/cpucaps.h>
+#include <asm/cputype.h>
 #include <asm/fpsimd.h>
 #include <asm/hwcap.h>
 #include <asm/sigcontext.h>
@@ -89,24 +90,231 @@ struct arm64_ftr_reg {
 
 extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
 
-/* scope of capability check */
-enum {
-	SCOPE_SYSTEM,
-	SCOPE_LOCAL_CPU,
-};
+/*
+ * CPU capabilities:
+ *
+ * We use arm64_cpu_capabilities to represent system features, errata work
+ * arounds (both used internally by kernel and tracked in cpu_hwcaps) and
+ * ELF HWCAPs (which are exposed to user).
+ *
+ * To support systems with heterogeneous CPUs, we need to make sure that we
+ * detect the capabilities correctly on the system and take appropriate
+ * measures to ensure there are no incompatibilities.
+ *
+ * This comment tries to explain how we treat the capabilities.
+ * Each capability has the following list of attributes :
+ *
+ * 1) Scope of Detection : The system detects a given capability by
+ *    performing some checks at runtime. This could be, e.g, checking the
+ *    value of a field in CPU ID feature register or checking the cpu
+ *    model. The capability provides a call back ( @matches() ) to
+ *    perform the check. Scope defines how the checks should be performed.
+ *    There are three cases:
+ *
+ *     a) SCOPE_LOCAL_CPU: check all the CPUs and "detect" if at least one
+ *        matches. This implies, we have to run the check on all the
+ *        booting CPUs, until the system decides that state of the
+ *        capability is finalised. (See section 2 below)
+ *		Or
+ *     b) SCOPE_SYSTEM: check all the CPUs and "detect" if all the CPUs
+ *        matches. This implies, we run the check only once, when the
+ *        system decides to finalise the state of the capability. If the
+ *        capability relies on a field in one of the CPU ID feature
+ *        registers, we use the sanitised value of the register from the
+ *        CPU feature infrastructure to make the decision.
+ *		Or
+ *     c) SCOPE_BOOT_CPU: Check only on the primary boot CPU to detect the
+ *        feature. This category is for features that are "finalised"
+ *        (or used) by the kernel very early even before the SMP cpus
+ *        are brought up.
+ *
+ *    The process of detection is usually denoted by "update" capability
+ *    state in the code.
+ *
+ * 2) Finalise the state : The kernel should finalise the state of a
+ *    capability at some point during its execution and take necessary
+ *    actions if any. Usually, this is done, after all the boot-time
+ *    enabled CPUs are brought up by the kernel, so that it can make
+ *    better decision based on the available set of CPUs. However, there
+ *    are some special cases, where the action is taken during the early
+ *    boot by the primary boot CPU. (e.g, running the kernel at EL2 with
+ *    Virtualisation Host Extensions). The kernel usually disallows any
+ *    changes to the state of a capability once it finalises the capability
+ *    and takes any action, as it may be impossible to execute the actions
+ *    safely. A CPU brought up after a capability is "finalised" is
+ *    referred to as "Late CPU" w.r.t the capability. e.g, all secondary
+ *    CPUs are treated "late CPUs" for capabilities determined by the boot
+ *    CPU.
+ *
+ *    At the moment there are two passes of finalising the capabilities.
+ *      a) Boot CPU scope capabilities - Finalised by primary boot CPU via
+ *         setup_boot_cpu_capabilities().
+ *      b) Everything except (a) - Run via setup_system_capabilities().
+ *
+ * 3) Verification: When a CPU is brought online (e.g, by user or by the
+ *    kernel), the kernel should make sure that it is safe to use the CPU,
+ *    by verifying that the CPU is compliant with the state of the
+ *    capabilities finalised already. This happens via :
+ *
+ *	secondary_start_kernel()-> check_local_cpu_capabilities()
+ *
+ *    As explained in (2) above, capabilities could be finalised at
+ *    different points in the execution. Each newly booted CPU is verified
+ *    against the capabilities that have been finalised by the time it
+ *    boots.
+ *
+ *	a) SCOPE_BOOT_CPU : All CPUs are verified against the capability
+ *	except for the primary boot CPU.
+ *
+ *	b) SCOPE_LOCAL_CPU, SCOPE_SYSTEM: All CPUs hotplugged on by the
+ *	user after the kernel boot are verified against the capability.
+ *
+ *    If there is a conflict, the kernel takes an action, based on the
+ *    severity (e.g, a CPU could be prevented from booting or cause a
+ *    kernel panic). The CPU is allowed to "affect" the state of the
+ *    capability, if it has not been finalised already. See section 5
+ *    for more details on conflicts.
+ *
+ * 4) Action: As mentioned in (2), the kernel can take an action for each
+ *    detected capability, on all CPUs on the system. Appropriate actions
+ *    include, turning on an architectural feature, modifying the control
+ *    registers (e.g, SCTLR, TCR etc.) or patching the kernel via
+ *    alternatives. The kernel patching is batched and performed at later
+ *    point. The actions are always initiated only after the capability
+ *    is finalised. This is usally denoted by "enabling" the capability.
+ *    The actions are initiated as follows :
+ *	a) Action is triggered on all online CPUs, after the capability is
+ *	finalised, invoked within the stop_machine() context from
+ *	enable_cpu_capabilitie().
+ *
+ *	b) Any late CPU, brought up after (1), the action is triggered via:
+ *
+ *	  check_local_cpu_capabilities() -> verify_local_cpu_capabilities()
+ *
+ * 5) Conflicts: Based on the state of the capability on a late CPU vs.
+ *    the system state, we could have the following combinations :
+ *
+ *		x-----------------------------x
+ *		| Type  | System   | Late CPU |
+ *		|-----------------------------|
+ *		|  a    |   y      |    n     |
+ *		|-----------------------------|
+ *		|  b    |   n      |    y     |
+ *		x-----------------------------x
+ *
+ *     Two separate flag bits are defined to indicate whether each kind of
+ *     conflict can be allowed:
+ *		ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU - Case(a) is allowed
+ *		ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU - Case(b) is allowed
+ *
+ *     Case (a) is not permitted for a capability that the system requires
+ *     all CPUs to have in order for the capability to be enabled. This is
+ *     typical for capabilities that represent enhanced functionality.
+ *
+ *     Case (b) is not permitted for a capability that must be enabled
+ *     during boot if any CPU in the system requires it in order to run
+ *     safely. This is typical for erratum work arounds that cannot be
+ *     enabled after the corresponding capability is finalised.
+ *
+ *     In some non-typical cases either both (a) and (b), or neither,
+ *     should be permitted. This can be described by including neither
+ *     or both flags in the capability's type field.
+ */
+
+
+/*
+ * Decide how the capability is detected.
+ * On any local CPU vs System wide vs the primary boot CPU
+ */
+#define ARM64_CPUCAP_SCOPE_LOCAL_CPU		((u16)BIT(0))
+#define ARM64_CPUCAP_SCOPE_SYSTEM		((u16)BIT(1))
+/*
+ * The capabilitiy is detected on the Boot CPU and is used by kernel
+ * during early boot. i.e, the capability should be "detected" and
+ * "enabled" as early as possibly on all booting CPUs.
+ */
+#define ARM64_CPUCAP_SCOPE_BOOT_CPU		((u16)BIT(2))
+#define ARM64_CPUCAP_SCOPE_MASK			\
+	(ARM64_CPUCAP_SCOPE_SYSTEM	|	\
+	 ARM64_CPUCAP_SCOPE_LOCAL_CPU	|	\
+	 ARM64_CPUCAP_SCOPE_BOOT_CPU)
+
+#define SCOPE_SYSTEM				ARM64_CPUCAP_SCOPE_SYSTEM
+#define SCOPE_LOCAL_CPU				ARM64_CPUCAP_SCOPE_LOCAL_CPU
+#define SCOPE_BOOT_CPU				ARM64_CPUCAP_SCOPE_BOOT_CPU
+#define SCOPE_ALL				ARM64_CPUCAP_SCOPE_MASK
+
+/*
+ * Is it permitted for a late CPU to have this capability when system
+ * hasn't already enabled it ?
+ */
+#define ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU	((u16)BIT(4))
+/* Is it safe for a late CPU to miss this capability when system has it */
+#define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU	((u16)BIT(5))
+
+/*
+ * CPU errata workarounds that need to be enabled at boot time if one or
+ * more CPUs in the system requires it. When one of these capabilities
+ * has been enabled, it is safe to allow any CPU to boot that doesn't
+ * require the workaround. However, it is not safe if a "late" CPU
+ * requires a workaround and the system hasn't enabled it already.
+ */
+#define ARM64_CPUCAP_LOCAL_CPU_ERRATUM		\
+	(ARM64_CPUCAP_SCOPE_LOCAL_CPU | ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU)
+/*
+ * CPU feature detected at boot time based on system-wide value of a
+ * feature. It is safe for a late CPU to have this feature even though
+ * the system hasn't enabled it, although the featuer will not be used
+ * by Linux in this case. If the system has enabled this feature already,
+ * then every late CPU must have it.
+ */
+#define ARM64_CPUCAP_SYSTEM_FEATURE	\
+	(ARM64_CPUCAP_SCOPE_SYSTEM | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU)
+/*
+ * CPU feature detected at boot time based on feature of one or more CPUs.
+ * All possible conflicts for a late CPU are ignored.
+ */
+#define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE		\
+	(ARM64_CPUCAP_SCOPE_LOCAL_CPU		|	\
+	 ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU	|	\
+	 ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU)
+
+/*
+ * CPU feature detected at boot time, on one or more CPUs. A late CPU
+ * is not allowed to have the capability when the system doesn't have it.
+ * It is Ok for a late CPU to miss the feature.
+ */
+#define ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE	\
+	(ARM64_CPUCAP_SCOPE_LOCAL_CPU		|	\
+	 ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU)
+
+/*
+ * CPU feature used early in the boot based on the boot CPU. All secondary
+ * CPUs must match the state of the capability as detected by the boot CPU.
+ */
+#define ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE ARM64_CPUCAP_SCOPE_BOOT_CPU
 
 struct arm64_cpu_capabilities {
 	const char *desc;
 	u16 capability;
-	int def_scope;			/* default scope */
+	u16 type;
 	bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
-	int (*enable)(void *);		/* Called on all active CPUs */
+	/*
+	 * Take the appropriate actions to enable this capability for this CPU.
+	 * For each successfully booted CPU, this method is called for each
+	 * globally detected capability.
+	 */
+	void (*cpu_enable)(const struct arm64_cpu_capabilities *cap);
 	union {
 		struct {	/* To be used for erratum handling only */
-			u32 midr_model;
-			u32 midr_range_min, midr_range_max;
+			struct midr_range midr_range;
+			const struct arm64_midr_revidr {
+				u32 midr_rv;		/* revision/variant */
+				u32 revidr_mask;
+			} * const fixed_revs;
 		};
 
+		const struct midr_range *midr_range_list;
 		struct {	/* Feature register checking */
 			u32 sys_reg;
 			u8 field_pos;
@@ -115,9 +323,38 @@ struct arm64_cpu_capabilities {
 			bool sign;
 			unsigned long hwcap;
 		};
+		/*
+		 * A list of "matches/cpu_enable" pair for the same
+		 * "capability" of the same "type" as described by the parent.
+		 * Only matches(), cpu_enable() and fields relevant to these
+		 * methods are significant in the list. The cpu_enable is
+		 * invoked only if the corresponding entry "matches()".
+		 * However, if a cpu_enable() method is associated
+		 * with multiple matches(), care should be taken that either
+		 * the match criteria are mutually exclusive, or that the
+		 * method is robust against being called multiple times.
+		 */
+		const struct arm64_cpu_capabilities *match_list;
 	};
 };
 
+static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
+{
+	return cap->type & ARM64_CPUCAP_SCOPE_MASK;
+}
+
+static inline bool
+cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
+{
+	return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU);
+}
+
+static inline bool
+cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap)
+{
+	return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU);
+}
+
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
 extern struct static_key_false arm64_const_caps_ready;
@@ -236,15 +473,8 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
 }
 
 void __init setup_cpu_features(void);
-
-void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
-			    const char *info);
-void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps);
 void check_local_cpu_capabilities(void);
 
-void update_cpu_errata_workarounds(void);
-void __init enable_errata_workarounds(void);
-void verify_local_cpu_errata_workarounds(void);
 
 u64 read_sanitised_ftr_reg(u32 id);
 
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 350c76a1d15b..30014a9f8f2b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -83,6 +83,8 @@
 #define ARM_CPU_PART_CORTEX_A53		0xD03
 #define ARM_CPU_PART_CORTEX_A73		0xD09
 #define ARM_CPU_PART_CORTEX_A75		0xD0A
+#define ARM_CPU_PART_CORTEX_A35		0xD04
+#define ARM_CPU_PART_CORTEX_A55		0xD05
 
 #define APM_CPU_PART_POTENZA		0x000
 
@@ -102,6 +104,8 @@
 #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
 #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
 #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
+#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35)
+#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
@@ -118,6 +122,45 @@
 #define read_cpuid(reg)			read_sysreg_s(SYS_ ## reg)
 
 /*
+ * Represent a range of MIDR values for a given CPU model and a
+ * range of variant/revision values.
+ *
+ * @model	- CPU model as defined by MIDR_CPU_MODEL
+ * @rv_min	- Minimum value for the revision/variant as defined by
+ *		  MIDR_CPU_VAR_REV
+ * @rv_max	- Maximum value for the variant/revision for the range.
+ */
+struct midr_range {
+	u32 model;
+	u32 rv_min;
+	u32 rv_max;
+};
+
+#define MIDR_RANGE(m, v_min, r_min, v_max, r_max)		\
+	{							\
+		.model = m,					\
+		.rv_min = MIDR_CPU_VAR_REV(v_min, r_min),	\
+		.rv_max = MIDR_CPU_VAR_REV(v_max, r_max),	\
+	}
+
+#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf)
+
+static inline bool is_midr_in_range(u32 midr, struct midr_range const *range)
+{
+	return MIDR_IS_CPU_MODEL_RANGE(midr, range->model,
+				 range->rv_min, range->rv_max);
+}
+
+static inline bool
+is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
+{
+	while (ranges->model)
+		if (is_midr_in_range(midr, ranges++))
+			return true;
+	return false;
+}
+
+/*
  * The CPU ID never changes at run time, so we might as well tell the
  * compiler that it's constant.  Use this function to read the CPU ID
  * rather than directly reading processor_id or read_cpuid() directly.
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 803443d74926..ce70c3ffb993 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -240,6 +240,15 @@
 		(((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >>		\
 		 ESR_ELx_SYS64_ISS_OP2_SHIFT))
 
+/*
+ * ISS field definitions for floating-point exception traps
+ * (FP_EXC_32/FP_EXC_64).
+ *
+ * (The FPEXC_* constants are used instead for common bits.)
+ */
+
+#define ESR_ELx_FP_EXC_TFV	(UL(1) << 23)
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 8857a0f0d0f7..aa7162ae93e3 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -22,33 +22,9 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/cache.h>
+#include <linux/init.h>
 #include <linux/stddef.h>
 
-/*
- * FP/SIMD storage area has:
- *  - FPSR and FPCR
- *  - 32 128-bit data registers
- *
- * Note that user_fpsimd forms a prefix of this structure, which is
- * relied upon in the ptrace FP/SIMD accessors.
- */
-struct fpsimd_state {
-	union {
-		struct user_fpsimd_state user_fpsimd;
-		struct {
-			__uint128_t vregs[32];
-			u32 fpsr;
-			u32 fpcr;
-			/*
-			 * For ptrace compatibility, pad to next 128-bit
-			 * boundary here if extending this struct.
-			 */
-		};
-	};
-	/* the id of the last cpu to have restored this state */
-	unsigned int cpu;
-};
-
 #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
 /* Masks for extracting the FPSR and FPCR from the FPSCR */
 #define VFP_FPSCR_STAT_MASK	0xf800009f
@@ -62,8 +38,8 @@ struct fpsimd_state {
 
 struct task_struct;
 
-extern void fpsimd_save_state(struct fpsimd_state *state);
-extern void fpsimd_load_state(struct fpsimd_state *state);
+extern void fpsimd_save_state(struct user_fpsimd_state *state);
+extern void fpsimd_load_state(struct user_fpsimd_state *state);
 
 extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
@@ -83,7 +59,9 @@ extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
 			   unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
-extern int sve_kernel_enable(void *);
+
+struct arm64_cpu_capabilities;
+extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
 
 extern int __ro_after_init sve_max_vl;
 
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index eec95768eaad..8262325e2fc6 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -4,8 +4,11 @@
 
 #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
 
+#include <linux/compiler_types.h>
+#include <linux/export.h>
 #include <linux/stringify.h>
 #include <asm/alternative.h>
+#include <asm/cpucaps.h>
 
 #ifdef __ASSEMBLER__
 
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 4f766178fa6f..b6dbbe3123a9 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -39,6 +39,8 @@ struct mod_arch_specific {
 u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
 			  Elf64_Sym *sym);
 
+u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val);
+
 #ifdef CONFIG_RANDOMIZE_BASE
 extern u64 module_alloc_base;
 #else
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 43393208229e..9234013e759e 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,7 +16,10 @@
 #ifndef __ASM_PERCPU_H
 #define __ASM_PERCPU_H
 
+#include <linux/preempt.h>
+
 #include <asm/alternative.h>
+#include <asm/cmpxchg.h>
 #include <asm/stack_pointer.h>
 
 static inline void set_my_cpu_offset(unsigned long off)
@@ -197,6 +200,32 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
 	return ret;
 }
 
+/* this_cpu_cmpxchg */
+#define _protect_cmpxchg_local(pcp, o, n)			\
+({								\
+	typeof(*raw_cpu_ptr(&(pcp))) __ret;			\
+	preempt_disable();					\
+	__ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n);	\
+	preempt_enable();					\
+	__ret;							\
+})
+
+#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+
+#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2)		\
+({									\
+	int __ret;							\
+	preempt_disable();						\
+	__ret = cmpxchg_double_local(	raw_cpu_ptr(&(ptr1)),		\
+					raw_cpu_ptr(&(ptr2)),		\
+					o1, o2, n1, n2);		\
+	preempt_enable();						\
+	__ret;								\
+})
+
 #define _percpu_read(pcp)						\
 ({									\
 	typeof(pcp) __retval;						\
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index cdfe3e657a9e..fd208eac9f2a 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -291,6 +291,7 @@
 #define TCR_TBI0		(UL(1) << 37)
 #define TCR_HA			(UL(1) << 39)
 #define TCR_HD			(UL(1) << 40)
+#define TCR_NFD1		(UL(1) << 54)
 
 /*
  * TTBR.
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index fce604e3e599..767598932549 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -34,10 +34,12 @@
 
 #ifdef __KERNEL__
 
+#include <linux/build_bug.h>
+#include <linux/stddef.h>
 #include <linux/string.h>
 
 #include <asm/alternative.h>
-#include <asm/fpsimd.h>
+#include <asm/cpufeature.h>
 #include <asm/hw_breakpoint.h>
 #include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
@@ -103,11 +105,19 @@ struct cpu_context {
 
 struct thread_struct {
 	struct cpu_context	cpu_context;	/* cpu context */
-	unsigned long		tp_value;	/* TLS register */
-#ifdef CONFIG_COMPAT
-	unsigned long		tp2_value;
-#endif
-	struct fpsimd_state	fpsimd_state;
+
+	/*
+	 * Whitelisted fields for hardened usercopy:
+	 * Maintainers must ensure manually that this contains no
+	 * implicit padding.
+	 */
+	struct {
+		unsigned long	tp_value;	/* TLS register */
+		unsigned long	tp2_value;
+		struct user_fpsimd_state fpsimd_state;
+	} uw;
+
+	unsigned int		fpsimd_cpu;
 	void			*sve_state;	/* SVE registers, if any */
 	unsigned int		sve_vl;		/* SVE vector length */
 	unsigned int		sve_vl_onexec;	/* SVE vl after next exec */
@@ -116,14 +126,17 @@ struct thread_struct {
 	struct debug_info	debug;		/* debugging */
 };
 
-/*
- * Everything usercopied to/from thread_struct is statically-sized, so
- * no hardened usercopy whitelist is needed.
- */
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
 						unsigned long *size)
 {
-	*offset = *size = 0;
+	/* Verify that there is no padding among the whitelisted fields: */
+	BUILD_BUG_ON(sizeof_field(struct thread_struct, uw) !=
+		     sizeof_field(struct thread_struct, uw.tp_value) +
+		     sizeof_field(struct thread_struct, uw.tp2_value) +
+		     sizeof_field(struct thread_struct, uw.fpsimd_state));
+
+	*offset = offsetof(struct thread_struct, uw);
+	*size = sizeof_field(struct thread_struct, uw);
 }
 
 #ifdef CONFIG_COMPAT
@@ -131,13 +144,13 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 ({									\
 	unsigned long *__tls;						\
 	if (is_compat_thread(task_thread_info(t)))			\
-		__tls = &(t)->thread.tp2_value;				\
+		__tls = &(t)->thread.uw.tp2_value;			\
 	else								\
-		__tls = &(t)->thread.tp_value;				\
+		__tls = &(t)->thread.uw.tp_value;			\
 	__tls;								\
  })
 #else
-#define task_user_tls(t)	(&(t)->thread.tp_value)
+#define task_user_tls(t)	(&(t)->thread.uw.tp_value)
 #endif
 
 /* Sync TPIDR_EL0 back to thread_struct for current */
@@ -227,9 +240,9 @@ static inline void spin_lock_prefetch(const void *ptr)
 
 #endif
 
-int cpu_enable_pan(void *__unused);
-int cpu_enable_cache_maint_trap(void *__unused);
-int cpu_clear_disr(void *__unused);
+void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused);
+void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
+void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
 
 /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
 #define SVE_SET_VL(arg)	sve_set_current_vl(arg)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 0e1960c59197..e7b9f154e476 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -490,6 +490,7 @@
 #define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS	BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0)
 
 /* id_aa64isar0 */
+#define ID_AA64ISAR0_TS_SHIFT		52
 #define ID_AA64ISAR0_FHM_SHIFT		48
 #define ID_AA64ISAR0_DP_SHIFT		44
 #define ID_AA64ISAR0_SM4_SHIFT		40
@@ -511,6 +512,7 @@
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_CSV3_SHIFT		60
 #define ID_AA64PFR0_CSV2_SHIFT		56
+#define ID_AA64PFR0_DIT_SHIFT		48
 #define ID_AA64PFR0_SVE_SHIFT		32
 #define ID_AA64PFR0_RAS_SHIFT		28
 #define ID_AA64PFR0_GIC_SHIFT		24
@@ -568,6 +570,7 @@
 #define ID_AA64MMFR1_VMIDBITS_16	2
 
 /* id_aa64mmfr2 */
+#define ID_AA64MMFR2_AT_SHIFT		32
 #define ID_AA64MMFR2_LVA_SHIFT		16
 #define ID_AA64MMFR2_IESB_SHIFT		12
 #define ID_AA64MMFR2_LSM_SHIFT		8
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 07aa8e3c5630..28893a0b141d 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -45,17 +45,6 @@ extern void __show_regs(struct pt_regs *);
 
 extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 
-#define show_unhandled_signals_ratelimited()				\
-({									\
-	static DEFINE_RATELIMIT_STATE(_rs,				\
-				      DEFAULT_RATELIMIT_INTERVAL,	\
-				      DEFAULT_RATELIMIT_BURST);		\
-	bool __show_ratelimited = false;				\
-	if (show_unhandled_signals && __ratelimit(&_rs))		\
-		__show_ratelimited = true;				\
-	__show_ratelimited;						\
-})
-
 int handle_guest_sea(phys_addr_t addr, unsigned int esr);
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 9e82dd79c7db..dfc61d73f740 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -60,6 +60,15 @@
 		__tlbi(op, (arg) | USER_ASID_FLAG);				\
 } while (0)
 
+/* This macro creates a properly formatted VA operand for the TLBI */
+#define __TLBI_VADDR(addr, asid)				\
+	({							\
+		unsigned long __ta = (addr) >> 12;		\
+		__ta &= GENMASK_ULL(43, 0);			\
+		__ta |= (unsigned long)(asid) << 48;		\
+		__ta;						\
+	})
+
 /*
  *	TLB Management
  *	==============
@@ -117,7 +126,7 @@ static inline void flush_tlb_all(void)
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-	unsigned long asid = ASID(mm) << 48;
+	unsigned long asid = __TLBI_VADDR(0, ASID(mm));
 
 	dsb(ishst);
 	__tlbi(aside1is, asid);
@@ -128,7 +137,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 				  unsigned long uaddr)
 {
-	unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
+	unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
 
 	dsb(ishst);
 	__tlbi(vale1is, addr);
@@ -146,7 +155,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
 				     bool last_level)
 {
-	unsigned long asid = ASID(vma->vm_mm) << 48;
+	unsigned long asid = ASID(vma->vm_mm);
 	unsigned long addr;
 
 	if ((end - start) > MAX_TLB_RANGE) {
@@ -154,8 +163,8 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 		return;
 	}
 
-	start = asid | (start >> 12);
-	end = asid | (end >> 12);
+	start = __TLBI_VADDR(start, asid);
+	end = __TLBI_VADDR(end, asid);
 
 	dsb(ishst);
 	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
@@ -185,8 +194,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 		return;
 	}
 
-	start >>= 12;
-	end >>= 12;
+	start = __TLBI_VADDR(start, 0);
+	end = __TLBI_VADDR(end, 0);
 
 	dsb(ishst);
 	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
@@ -202,7 +211,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 static inline void __flush_tlb_pgtable(struct mm_struct *mm,
 				       unsigned long uaddr)
 {
-	unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
+	unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm));
 
 	__tlbi(vae1is, addr);
 	__tlbi_user(vae1is, addr);
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 178e338d2889..c320f3bf6c57 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -35,10 +35,10 @@ struct undef_hook {
 
 void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
-void force_signal_inject(int signal, int code, struct pt_regs *regs,
-			 unsigned long address);
-
-void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr);
+void force_signal_inject(int signal, int code, unsigned long address);
+void arm64_notify_segfault(unsigned long addr);
+void arm64_force_sig_info(struct siginfo *info, const char *str,
+			  struct task_struct *tsk);
 
 /*
  * Move regs->pc to next instruction and do necessary setup before it
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index c5f89442785c..9d1e24e030b3 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -102,12 +102,6 @@ static inline bool has_vhe(void)
 	return false;
 }
 
-#ifdef CONFIG_ARM64_VHE
-extern void verify_cpu_run_el(void);
-#else
-static inline void verify_cpu_run_el(void) {}
-#endif
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index f018c3deea3b..17c65c8f33cb 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -44,5 +44,9 @@
 #define HWCAP_SHA512		(1 << 21)
 #define HWCAP_SVE		(1 << 22)
 #define HWCAP_ASIMDFHM		(1 << 23)
+#define HWCAP_DIT		(1 << 24)
+#define HWCAP_USCAT		(1 << 25)
+#define HWCAP_ILRCPC		(1 << 26)
+#define HWCAP_FLAGM		(1 << 27)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/siginfo.h b/arch/arm64/include/uapi/asm/siginfo.h
index 9b4d91277742..574d12f86039 100644
--- a/arch/arm64/include/uapi/asm/siginfo.h
+++ b/arch/arm64/include/uapi/asm/siginfo.h
@@ -21,25 +21,4 @@
 
 #include <asm-generic/siginfo.h>
 
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-/*
- * SIGBUS si_codes
- */
-#ifdef __KERNEL__
-#define BUS_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-/*
- * SIGTRAP si_codes
- */
-#ifdef __KERNEL__
-#define TRAP_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
 #endif
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 68450e954d47..6e47fc3ab549 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -429,7 +429,7 @@ ret:
 
 fault:
 	pr_debug("SWP{B} emulation: access caused memory abort!\n");
-	arm64_notify_segfault(regs, address);
+	arm64_notify_segfault(address);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b5a28336c077..2df792771053 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -24,10 +24,28 @@
 static bool __maybe_unused
 is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
 {
+	const struct arm64_midr_revidr *fix;
+	u32 midr = read_cpuid_id(), revidr;
+
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+	if (!is_midr_in_range(midr, &entry->midr_range))
+		return false;
+
+	midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
+	revidr = read_cpuid(REVIDR_EL1);
+	for (fix = entry->fixed_revs; fix && fix->revidr_mask; fix++)
+		if (midr == fix->midr_rv && (revidr & fix->revidr_mask))
+			return false;
+
+	return true;
+}
+
+static bool __maybe_unused
+is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry,
+			    int scope)
+{
 	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
-	return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
-				       entry->midr_range_min,
-				       entry->midr_range_max);
+	return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list);
 }
 
 static bool __maybe_unused
@@ -41,7 +59,7 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope)
 	model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) |
 		 MIDR_ARCHITECTURE_MASK;
 
-	return model == entry->midr_model;
+	return model == entry->midr_range.model;
 }
 
 static bool
@@ -53,11 +71,11 @@ has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
 		(arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask);
 }
 
-static int cpu_enable_trap_ctr_access(void *__unused)
+static void
+cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
 {
 	/* Clear SCTLR_EL1.UCT */
 	config_sctlr_el1(SCTLR_EL1_UCT, 0);
-	return 0;
 }
 
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
@@ -161,25 +179,25 @@ static void call_hvc_arch_workaround_1(void)
 	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
 }
 
-static int enable_smccc_arch_workaround_1(void *data)
+static void
+enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
 {
-	const struct arm64_cpu_capabilities *entry = data;
 	bp_hardening_cb_t cb;
 	void *smccc_start, *smccc_end;
 	struct arm_smccc_res res;
 
 	if (!entry->matches(entry, SCOPE_LOCAL_CPU))
-		return 0;
+		return;
 
 	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
-		return 0;
+		return;
 
 	switch (psci_ops.conduit) {
 	case PSCI_CONDUIT_HVC:
 		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 		if ((int)res.a0 < 0)
-			return 0;
+			return;
 		cb = call_hvc_arch_workaround_1;
 		smccc_start = __smccc_workaround_1_hvc_start;
 		smccc_end = __smccc_workaround_1_hvc_end;
@@ -189,19 +207,19 @@ static int enable_smccc_arch_workaround_1(void *data)
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
 				  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 		if ((int)res.a0 < 0)
-			return 0;
+			return;
 		cb = call_smc_arch_workaround_1;
 		smccc_start = __smccc_workaround_1_smc_start;
 		smccc_end = __smccc_workaround_1_smc_end;
 		break;
 
 	default:
-		return 0;
+		return;
 	}
 
 	install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
 
-	return 0;
+	return;
 }
 
 static void qcom_link_stack_sanitization(void)
@@ -216,31 +234,119 @@ static void qcom_link_stack_sanitization(void)
 		     : "=&r" (tmp));
 }
 
-static int qcom_enable_link_stack_sanitization(void *data)
+static void
+qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry)
 {
-	const struct arm64_cpu_capabilities *entry = data;
-
 	install_bp_hardening_cb(entry, qcom_link_stack_sanitization,
 				__qcom_hyp_sanitize_link_stack_start,
 				__qcom_hyp_sanitize_link_stack_end);
-
-	return 0;
 }
 #endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
 
-#define MIDR_RANGE(model, min, max) \
-	.def_scope = SCOPE_LOCAL_CPU, \
-	.matches = is_affected_midr_range, \
-	.midr_model = model, \
-	.midr_range_min = min, \
-	.midr_range_max = max
+#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max)	\
+	.matches = is_affected_midr_range,			\
+	.midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max)
+
+#define CAP_MIDR_ALL_VERSIONS(model)					\
+	.matches = is_affected_midr_range,				\
+	.midr_range = MIDR_ALL_VERSIONS(model)
+
+#define MIDR_FIXED(rev, revidr_mask) \
+	.fixed_revs = (struct arm64_midr_revidr[]){{ (rev), (revidr_mask) }, {}}
+
+#define ERRATA_MIDR_RANGE(model, v_min, r_min, v_max, r_max)		\
+	.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,				\
+	CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max)
+
+#define CAP_MIDR_RANGE_LIST(list)				\
+	.matches = is_affected_midr_range_list,			\
+	.midr_range_list = list
+
+/* Errata affecting a range of revisions of  given model variant */
+#define ERRATA_MIDR_REV_RANGE(m, var, r_min, r_max)	 \
+	ERRATA_MIDR_RANGE(m, var, r_min, var, r_max)
+
+/* Errata affecting a single variant/revision of a model */
+#define ERRATA_MIDR_REV(model, var, rev)	\
+	ERRATA_MIDR_RANGE(model, var, rev, var, rev)
+
+/* Errata affecting all variants/revisions of a given a model */
+#define ERRATA_MIDR_ALL_VERSIONS(model)				\
+	.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,			\
+	CAP_MIDR_ALL_VERSIONS(model)
+
+/* Errata affecting a list of midr ranges, with same work around */
+#define ERRATA_MIDR_RANGE_LIST(midr_list)			\
+	.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,			\
+	CAP_MIDR_RANGE_LIST(midr_list)
+
+/*
+ * Generic helper for handling capabilties with multiple (match,enable) pairs
+ * of call backs, sharing the same capability bit.
+ * Iterate over each entry to see if at least one matches.
+ */
+static bool __maybe_unused
+multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	const struct arm64_cpu_capabilities *caps;
+
+	for (caps = entry->match_list; caps->matches; caps++)
+		if (caps->matches(caps, scope))
+			return true;
+
+	return false;
+}
+
+/*
+ * Take appropriate action for all matching entries in the shared capability
+ * entry.
+ */
+static void __maybe_unused
+multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry)
+{
+	const struct arm64_cpu_capabilities *caps;
 
-#define MIDR_ALL_VERSIONS(model) \
-	.def_scope = SCOPE_LOCAL_CPU, \
-	.matches = is_affected_midr_range, \
-	.midr_model = model, \
-	.midr_range_min = 0, \
-	.midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK)
+	for (caps = entry->match_list; caps->matches; caps++)
+		if (caps->matches(caps, SCOPE_LOCAL_CPU) &&
+		    caps->cpu_enable)
+			caps->cpu_enable(caps);
+}
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+
+/*
+ * List of CPUs where we need to issue a psci call to
+ * harden the branch predictor.
+ */
+static const struct midr_range arm64_bp_harden_smccc_cpus[] = {
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+	MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+	MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
+	{},
+};
+
+static const struct midr_range qcom_bp_harden_cpus[] = {
+	MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
+	MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+	{},
+};
+
+static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = {
+	{
+		CAP_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
+		.cpu_enable = enable_smccc_arch_workaround_1,
+	},
+	{
+		CAP_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
+		.cpu_enable = qcom_enable_link_stack_sanitization,
+	},
+	{},
+};
+
+#endif
 
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #if	defined(CONFIG_ARM64_ERRATUM_826319) || \
@@ -250,8 +356,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cortex-A53 r0p[012] */
 		.desc = "ARM errata 826319, 827319, 824069",
 		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
-		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02),
-		.enable = cpu_enable_cache_maint_trap,
+		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2),
+		.cpu_enable = cpu_enable_cache_maint_trap,
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_819472
@@ -259,8 +365,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cortex-A53 r0p[01] */
 		.desc = "ARM errata 819472",
 		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
-		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01),
-		.enable = cpu_enable_cache_maint_trap,
+		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1),
+		.cpu_enable = cpu_enable_cache_maint_trap,
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_832075
@@ -268,9 +374,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cortex-A57 r0p0 - r1p2 */
 		.desc = "ARM erratum 832075",
 		.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
-		MIDR_RANGE(MIDR_CORTEX_A57,
-			   MIDR_CPU_VAR_REV(0, 0),
-			   MIDR_CPU_VAR_REV(1, 2)),
+		ERRATA_MIDR_RANGE(MIDR_CORTEX_A57,
+				  0, 0,
+				  1, 2),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_834220
@@ -278,9 +384,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cortex-A57 r0p0 - r1p2 */
 		.desc = "ARM erratum 834220",
 		.capability = ARM64_WORKAROUND_834220,
-		MIDR_RANGE(MIDR_CORTEX_A57,
-			   MIDR_CPU_VAR_REV(0, 0),
-			   MIDR_CPU_VAR_REV(1, 2)),
+		ERRATA_MIDR_RANGE(MIDR_CORTEX_A57,
+				  0, 0,
+				  1, 2),
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_843419
+	{
+	/* Cortex-A53 r0p[01234] */
+		.desc = "ARM erratum 843419",
+		.capability = ARM64_WORKAROUND_843419,
+		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
+		MIDR_FIXED(0x4, BIT(8)),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_845719
@@ -288,7 +403,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cortex-A53 r0p[01234] */
 		.desc = "ARM erratum 845719",
 		.capability = ARM64_WORKAROUND_845719,
-		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04),
+		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
 	},
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_23154
@@ -296,7 +411,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cavium ThunderX, pass 1.x */
 		.desc = "Cavium erratum 23154",
 		.capability = ARM64_WORKAROUND_CAVIUM_23154,
-		MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01),
+		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1),
 	},
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
@@ -304,15 +419,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
 		.desc = "Cavium erratum 27456",
 		.capability = ARM64_WORKAROUND_CAVIUM_27456,
-		MIDR_RANGE(MIDR_THUNDERX,
-			   MIDR_CPU_VAR_REV(0, 0),
-			   MIDR_CPU_VAR_REV(1, 1)),
+		ERRATA_MIDR_RANGE(MIDR_THUNDERX,
+				  0, 0,
+				  1, 1),
 	},
 	{
 	/* Cavium ThunderX, T81 pass 1.0 */
 		.desc = "Cavium erratum 27456",
 		.capability = ARM64_WORKAROUND_CAVIUM_27456,
-		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
+		ERRATA_MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
 	},
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_30115
@@ -320,42 +435,41 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cavium ThunderX, T88 pass 1.x - 2.2 */
 		.desc = "Cavium erratum 30115",
 		.capability = ARM64_WORKAROUND_CAVIUM_30115,
-		MIDR_RANGE(MIDR_THUNDERX, 0x00,
-			   (1 << MIDR_VARIANT_SHIFT) | 2),
+		ERRATA_MIDR_RANGE(MIDR_THUNDERX,
+				      0, 0,
+				      1, 2),
 	},
 	{
 	/* Cavium ThunderX, T81 pass 1.0 - 1.2 */
 		.desc = "Cavium erratum 30115",
 		.capability = ARM64_WORKAROUND_CAVIUM_30115,
-		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02),
+		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2),
 	},
 	{
 	/* Cavium ThunderX, T83 pass 1.0 */
 		.desc = "Cavium erratum 30115",
 		.capability = ARM64_WORKAROUND_CAVIUM_30115,
-		MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00),
+		ERRATA_MIDR_REV(MIDR_THUNDERX_83XX, 0, 0),
 	},
 #endif
 	{
 		.desc = "Mismatched cache line size",
 		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
 		.matches = has_mismatched_cache_line_size,
-		.def_scope = SCOPE_LOCAL_CPU,
-		.enable = cpu_enable_trap_ctr_access,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.cpu_enable = cpu_enable_trap_ctr_access,
 	},
 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
 	{
 		.desc = "Qualcomm Technologies Falkor erratum 1003",
 		.capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
-		MIDR_RANGE(MIDR_QCOM_FALKOR_V1,
-			   MIDR_CPU_VAR_REV(0, 0),
-			   MIDR_CPU_VAR_REV(0, 0)),
+		ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0),
 	},
 	{
 		.desc = "Qualcomm Technologies Kryo erratum 1003",
 		.capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
-		.def_scope = SCOPE_LOCAL_CPU,
-		.midr_model = MIDR_QCOM_KRYO,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.midr_range.model = MIDR_QCOM_KRYO,
 		.matches = is_kryo_midr,
 	},
 #endif
@@ -363,9 +477,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 		.desc = "Qualcomm Technologies Falkor erratum 1009",
 		.capability = ARM64_WORKAROUND_REPEAT_TLBI,
-		MIDR_RANGE(MIDR_QCOM_FALKOR_V1,
-			   MIDR_CPU_VAR_REV(0, 0),
-			   MIDR_CPU_VAR_REV(0, 0)),
+		ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_858921
@@ -373,92 +485,22 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	/* Cortex-A73 all versions */
 		.desc = "ARM erratum 858921",
 		.capability = ARM64_WORKAROUND_858921,
-		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+		ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
 	},
 #endif
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 	{
 		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
-		.enable = enable_smccc_arch_workaround_1,
-	},
-	{
-		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
-		.enable = enable_smccc_arch_workaround_1,
-	},
-	{
-		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
-		.enable = enable_smccc_arch_workaround_1,
-	},
-	{
-		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
-		.enable = enable_smccc_arch_workaround_1,
-	},
-	{
-		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
-		.enable = qcom_enable_link_stack_sanitization,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.matches = multi_entry_cap_matches,
+		.cpu_enable = multi_entry_cap_cpu_enable,
+		.match_list = arm64_bp_harden_list,
 	},
 	{
 		.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
-		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
-	},
-	{
-		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
-		.enable = qcom_enable_link_stack_sanitization,
-	},
-	{
-		.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
-		MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
-	},
-	{
-		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
-		.enable = enable_smccc_arch_workaround_1,
-	},
-	{
-		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
-		MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
-		.enable = enable_smccc_arch_workaround_1,
+		ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
 	},
 #endif
 	{
 	}
 };
-
-/*
- * The CPU Errata work arounds are detected and applied at boot time
- * and the related information is freed soon after. If the new CPU requires
- * an errata not detected at boot, fail this CPU.
- */
-void verify_local_cpu_errata_workarounds(void)
-{
-	const struct arm64_cpu_capabilities *caps = arm64_errata;
-
-	for (; caps->matches; caps++) {
-		if (cpus_have_cap(caps->capability)) {
-			if (caps->enable)
-				caps->enable((void *)caps);
-		} else if (caps->matches(caps, SCOPE_LOCAL_CPU)) {
-			pr_crit("CPU%d: Requires work around for %s, not detected"
-					" at boot time\n",
-				smp_processor_id(),
-				caps->desc ? : "an erratum");
-			cpu_die_early();
-		}
-	}
-}
-
-void update_cpu_errata_workarounds(void)
-{
-	update_cpu_capabilities(arm64_errata, "enabling workaround for");
-}
-
-void __init enable_errata_workarounds(void)
-{
-	enable_cpu_capabilities(arm64_errata);
-}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 2985a067fc13..96b15d7b10a8 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -123,6 +123,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
  * sync with the documentation of the CPU feature register ABI.
  */
 static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
@@ -148,6 +149,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
 				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
@@ -190,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
@@ -199,12 +202,12 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
 };
 
 static const struct arm64_ftr_bits ftr_ctr[] = {
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1),		/* RES1 */
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1),	/* DIC */
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1),	/* IDC */
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0),	/* ERG */
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_CWG_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_ERG_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1),
 	/*
 	 * Linux can handle differing I-cache policies. Userspace JITs will
 	 * make use of *minLine.
@@ -506,6 +509,9 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
 	reg->user_mask = user_mask;
 }
 
+extern const struct arm64_cpu_capabilities arm64_errata[];
+static void __init setup_boot_cpu_capabilities(void);
+
 void __init init_cpu_features(struct cpuinfo_arm64 *info)
 {
 	/* Before we start using the tables, make sure it is sorted */
@@ -548,6 +554,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 		init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
 		sve_init_vq_map();
 	}
+
+	/*
+	 * Detect and enable early CPU capabilities based on the boot CPU,
+	 * after we have initialised the CPU feature infrastructure.
+	 */
+	setup_boot_cpu_capabilities();
 }
 
 static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -826,11 +838,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _
 		MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
 }
 
-static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
-{
-	return is_kernel_in_hyp_mode();
-}
-
 static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
 			   int __unused)
 {
@@ -852,14 +859,30 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
 					ID_AA64PFR0_FP_SHIFT) < 0;
 }
 
+static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
+			  int __unused)
+{
+	return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_IDC_SHIFT);
+}
+
+static bool has_cache_dic(const struct arm64_cpu_capabilities *entry,
+			  int __unused)
+{
+	return read_sanitised_ftr_reg(SYS_CTR_EL0) & BIT(CTR_DIC_SHIFT);
+}
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
 
 static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
-				int __unused)
+				int scope)
 {
+	/* List of CPUs that are not vulnerable and don't need KPTI */
+	static const struct midr_range kpti_safe_list[] = {
+		MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
+		MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+	};
 	char const *str = "command line option";
-	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 
 	/*
 	 * For reasons that aren't entirely clear, enabling KPTI on Cavium
@@ -883,18 +906,15 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
 		return true;
 
 	/* Don't force KPTI for CPUs that are not vulnerable */
-	switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) {
-	case MIDR_CAVIUM_THUNDERX2:
-	case MIDR_BRCM_VULCAN:
+	if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list))
 		return false;
-	}
 
 	/* Defer to CPU feature registers */
-	return !cpuid_feature_extract_unsigned_field(pfr0,
-						     ID_AA64PFR0_CSV3_SHIFT);
+	return !has_cpuid_feature(entry, scope);
 }
 
-static int kpti_install_ng_mappings(void *__unused)
+static void
+kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 {
 	typedef void (kpti_remap_fn)(int, int, phys_addr_t);
 	extern kpti_remap_fn idmap_kpti_install_ng_mappings;
@@ -904,7 +924,7 @@ static int kpti_install_ng_mappings(void *__unused)
 	int cpu = smp_processor_id();
 
 	if (kpti_applied)
-		return 0;
+		return;
 
 	remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
 
@@ -915,7 +935,7 @@ static int kpti_install_ng_mappings(void *__unused)
 	if (!cpu)
 		kpti_applied = true;
 
-	return 0;
+	return;
 }
 
 static int __init parse_kpti(char *str)
@@ -932,7 +952,78 @@ static int __init parse_kpti(char *str)
 __setup("kpti=", parse_kpti);
 #endif	/* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
-static int cpu_copy_el2regs(void *__unused)
+#ifdef CONFIG_ARM64_HW_AFDBM
+static inline void __cpu_enable_hw_dbm(void)
+{
+	u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
+
+	write_sysreg(tcr, tcr_el1);
+	isb();
+}
+
+static bool cpu_has_broken_dbm(void)
+{
+	/* List of CPUs which have broken DBM support. */
+	static const struct midr_range cpus[] = {
+#ifdef CONFIG_ARM64_ERRATUM_1024718
+		MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0),  // A55 r0p0 -r1p0
+#endif
+		{},
+	};
+
+	return is_midr_in_range_list(read_cpuid_id(), cpus);
+}
+
+static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
+{
+	return has_cpuid_feature(cap, SCOPE_LOCAL_CPU) &&
+	       !cpu_has_broken_dbm();
+}
+
+static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap)
+{
+	if (cpu_can_use_dbm(cap))
+		__cpu_enable_hw_dbm();
+}
+
+static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
+		       int __unused)
+{
+	static bool detected = false;
+	/*
+	 * DBM is a non-conflicting feature. i.e, the kernel can safely
+	 * run a mix of CPUs with and without the feature. So, we
+	 * unconditionally enable the capability to allow any late CPU
+	 * to use the feature. We only enable the control bits on the
+	 * CPU, if it actually supports.
+	 *
+	 * We have to make sure we print the "feature" detection only
+	 * when at least one CPU actually uses it. So check if this CPU
+	 * can actually use it and print the message exactly once.
+	 *
+	 * This is safe as all CPUs (including secondary CPUs - due to the
+	 * LOCAL_CPU scope - and the hotplugged CPUs - via verification)
+	 * goes through the "matches" check exactly once. Also if a CPU
+	 * matches the criteria, it is guaranteed that the CPU will turn
+	 * the DBM on, as the capability is unconditionally enabled.
+	 */
+	if (!detected && cpu_can_use_dbm(cap)) {
+		detected = true;
+		pr_info("detected: Hardware dirty bit management\n");
+	}
+
+	return true;
+}
+
+#endif
+
+#ifdef CONFIG_ARM64_VHE
+static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+	return is_kernel_in_hyp_mode();
+}
+
+static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
 {
 	/*
 	 * Copy register values that aren't redirected by hardware.
@@ -944,15 +1035,14 @@ static int cpu_copy_el2regs(void *__unused)
 	 */
 	if (!alternatives_applied)
 		write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
-
-	return 0;
 }
+#endif
 
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
 		.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_useable_gicv3_cpuif,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.field_pos = ID_AA64PFR0_GIC_SHIFT,
@@ -963,20 +1053,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "Privileged Access Never",
 		.capability = ARM64_HAS_PAN,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
-		.enable = cpu_enable_pan,
+		.cpu_enable = cpu_enable_pan,
 	},
 #endif /* CONFIG_ARM64_PAN */
 #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
 	{
 		.desc = "LSE atomic instructions",
 		.capability = ARM64_HAS_LSE_ATOMICS,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR0_EL1,
 		.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
@@ -987,14 +1077,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "Software prefetching using PRFM",
 		.capability = ARM64_HAS_NO_HW_PREFETCH,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
 		.matches = has_no_hw_prefetch,
 	},
 #ifdef CONFIG_ARM64_UAO
 	{
 		.desc = "User Access Override",
 		.capability = ARM64_HAS_UAO,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.field_pos = ID_AA64MMFR2_UAO_SHIFT,
@@ -1008,21 +1098,23 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 #ifdef CONFIG_ARM64_PAN
 	{
 		.capability = ARM64_ALT_PAN_NOT_UAO,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = cpufeature_pan_not_uao,
 	},
 #endif /* CONFIG_ARM64_PAN */
+#ifdef CONFIG_ARM64_VHE
 	{
 		.desc = "Virtualization Host Extensions",
 		.capability = ARM64_HAS_VIRT_HOST_EXTN,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
 		.matches = runs_at_el2,
-		.enable = cpu_copy_el2regs,
+		.cpu_enable = cpu_copy_el2regs,
 	},
+#endif	/* CONFIG_ARM64_VHE */
 	{
 		.desc = "32-bit EL0 Support",
 		.capability = ARM64_HAS_32BIT_EL0,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
@@ -1032,22 +1124,30 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "Reduced HYP mapping offset",
 		.capability = ARM64_HYP_OFFSET_LOW,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = hyp_offset_low,
 	},
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	{
 		.desc = "Kernel page table isolation (KPTI)",
 		.capability = ARM64_UNMAP_KERNEL_AT_EL0,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE,
+		/*
+		 * The ID feature fields below are used to indicate that
+		 * the CPU doesn't need KPTI. See unmap_kernel_at_el0 for
+		 * more details.
+		 */
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.field_pos = ID_AA64PFR0_CSV3_SHIFT,
+		.min_field_value = 1,
 		.matches = unmap_kernel_at_el0,
-		.enable = kpti_install_ng_mappings,
+		.cpu_enable = kpti_install_ng_mappings,
 	},
 #endif
 	{
 		/* FP/SIMD is not implemented */
 		.capability = ARM64_HAS_NO_FPSIMD,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.min_field_value = 0,
 		.matches = has_no_fpsimd,
 	},
@@ -1055,7 +1155,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "Data cache clean to Point of Persistence",
 		.capability = ARM64_HAS_DCPOP,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64ISAR1_EL1,
 		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
@@ -1065,42 +1165,74 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 #ifdef CONFIG_ARM64_SVE
 	{
 		.desc = "Scalable Vector Extension",
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.capability = ARM64_SVE,
-		.def_scope = SCOPE_SYSTEM,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_SVE_SHIFT,
 		.min_field_value = ID_AA64PFR0_SVE,
 		.matches = has_cpuid_feature,
-		.enable = sve_kernel_enable,
+		.cpu_enable = sve_kernel_enable,
 	},
 #endif /* CONFIG_ARM64_SVE */
 #ifdef CONFIG_ARM64_RAS_EXTN
 	{
 		.desc = "RAS Extension Support",
 		.capability = ARM64_HAS_RAS_EXTN,
-		.def_scope = SCOPE_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_RAS_SHIFT,
 		.min_field_value = ID_AA64PFR0_RAS_V1,
-		.enable = cpu_clear_disr,
+		.cpu_enable = cpu_clear_disr,
 	},
 #endif /* CONFIG_ARM64_RAS_EXTN */
+	{
+		.desc = "Data cache clean to the PoU not required for I/D coherence",
+		.capability = ARM64_HAS_CACHE_IDC,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cache_idc,
+	},
+	{
+		.desc = "Instruction cache invalidation not required for I/D coherence",
+		.capability = ARM64_HAS_CACHE_DIC,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cache_dic,
+	},
+#ifdef CONFIG_ARM64_HW_AFDBM
+	{
+		/*
+		 * Since we turn this on always, we don't want the user to
+		 * think that the feature is available when it may not be.
+		 * So hide the description.
+		 *
+		 * .desc = "Hardware pagetable Dirty Bit Management",
+		 *
+		 */
+		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+		.capability = ARM64_HW_DBM,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
+		.min_field_value = 2,
+		.matches = has_hw_dbm,
+		.cpu_enable = cpu_enable_hw_dbm,
+	},
+#endif
 	{},
 };
 
-#define HWCAP_CAP(reg, field, s, min_value, type, cap)	\
+#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap)	\
 	{							\
 		.desc = #cap,					\
-		.def_scope = SCOPE_SYSTEM,			\
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,		\
 		.matches = has_cpuid_feature,			\
 		.sys_reg = reg,					\
 		.field_pos = field,				\
 		.sign = s,					\
 		.min_field_value = min_value,			\
-		.hwcap_type = type,				\
+		.hwcap_type = cap_type,				\
 		.hwcap = cap,					\
 	}
 
@@ -1118,14 +1250,18 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC),
+	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
 #endif
@@ -1193,7 +1329,7 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
 	/* We support emulation of accesses to CPU ID feature registers */
 	elf_hwcap |= HWCAP_CPUID;
 	for (; hwcaps->matches; hwcaps++)
-		if (hwcaps->matches(hwcaps, hwcaps->def_scope))
+		if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps)))
 			cap_set_elf_hwcap(hwcaps);
 }
 
@@ -1210,17 +1346,19 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array,
 		return false;
 
 	for (caps = cap_array; caps->matches; caps++)
-		if (caps->capability == cap &&
-		    caps->matches(caps, SCOPE_LOCAL_CPU))
-			return true;
+		if (caps->capability == cap)
+			return caps->matches(caps, SCOPE_LOCAL_CPU);
+
 	return false;
 }
 
-void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
-			    const char *info)
+static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+				      u16 scope_mask, const char *info)
 {
+	scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
 	for (; caps->matches; caps++) {
-		if (!caps->matches(caps, caps->def_scope))
+		if (!(caps->type & scope_mask) ||
+		    !caps->matches(caps, cpucap_default_scope(caps)))
 			continue;
 
 		if (!cpus_have_cap(caps->capability) && caps->desc)
@@ -1229,41 +1367,145 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 	}
 }
 
+static void update_cpu_capabilities(u16 scope_mask)
+{
+	__update_cpu_capabilities(arm64_features, scope_mask, "detected:");
+	__update_cpu_capabilities(arm64_errata, scope_mask,
+				  "enabling workaround for");
+}
+
+static int __enable_cpu_capability(void *arg)
+{
+	const struct arm64_cpu_capabilities *cap = arg;
+
+	cap->cpu_enable(cap);
+	return 0;
+}
+
 /*
  * Run through the enabled capabilities and enable() it on all active
  * CPUs
  */
-void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+static void __init
+__enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			  u16 scope_mask)
 {
+	scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
 	for (; caps->matches; caps++) {
 		unsigned int num = caps->capability;
 
-		if (!cpus_have_cap(num))
+		if (!(caps->type & scope_mask) || !cpus_have_cap(num))
 			continue;
 
 		/* Ensure cpus_have_const_cap(num) works */
 		static_branch_enable(&cpu_hwcap_keys[num]);
 
-		if (caps->enable) {
+		if (caps->cpu_enable) {
 			/*
-			 * Use stop_machine() as it schedules the work allowing
-			 * us to modify PSTATE, instead of on_each_cpu() which
-			 * uses an IPI, giving us a PSTATE that disappears when
-			 * we return.
+			 * Capabilities with SCOPE_BOOT_CPU scope are finalised
+			 * before any secondary CPU boots. Thus, each secondary
+			 * will enable the capability as appropriate via
+			 * check_local_cpu_capabilities(). The only exception is
+			 * the boot CPU, for which the capability must be
+			 * enabled here. This approach avoids costly
+			 * stop_machine() calls for this case.
+			 *
+			 * Otherwise, use stop_machine() as it schedules the
+			 * work allowing us to modify PSTATE, instead of
+			 * on_each_cpu() which uses an IPI, giving us a PSTATE
+			 * that disappears when we return.
 			 */
-			stop_machine(caps->enable, (void *)caps, cpu_online_mask);
+			if (scope_mask & SCOPE_BOOT_CPU)
+				caps->cpu_enable(caps);
+			else
+				stop_machine(__enable_cpu_capability,
+					     (void *)caps, cpu_online_mask);
 		}
 	}
 }
 
+static void __init enable_cpu_capabilities(u16 scope_mask)
+{
+	__enable_cpu_capabilities(arm64_features, scope_mask);
+	__enable_cpu_capabilities(arm64_errata, scope_mask);
+}
+
+/*
+ * Run through the list of capabilities to check for conflicts.
+ * If the system has already detected a capability, take necessary
+ * action on this CPU.
+ *
+ * Returns "false" on conflicts.
+ */
+static bool
+__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps,
+			u16 scope_mask)
+{
+	bool cpu_has_cap, system_has_cap;
+
+	scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
+
+	for (; caps->matches; caps++) {
+		if (!(caps->type & scope_mask))
+			continue;
+
+		cpu_has_cap = caps->matches(caps, SCOPE_LOCAL_CPU);
+		system_has_cap = cpus_have_cap(caps->capability);
+
+		if (system_has_cap) {
+			/*
+			 * Check if the new CPU misses an advertised feature,
+			 * which is not safe to miss.
+			 */
+			if (!cpu_has_cap && !cpucap_late_cpu_optional(caps))
+				break;
+			/*
+			 * We have to issue cpu_enable() irrespective of
+			 * whether the CPU has it or not, as it is enabeld
+			 * system wide. It is upto the call back to take
+			 * appropriate action on this CPU.
+			 */
+			if (caps->cpu_enable)
+				caps->cpu_enable(caps);
+		} else {
+			/*
+			 * Check if the CPU has this capability if it isn't
+			 * safe to have when the system doesn't.
+			 */
+			if (cpu_has_cap && !cpucap_late_cpu_permitted(caps))
+				break;
+		}
+	}
+
+	if (caps->matches) {
+		pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n",
+			smp_processor_id(), caps->capability,
+			caps->desc, system_has_cap, cpu_has_cap);
+		return false;
+	}
+
+	return true;
+}
+
+static bool verify_local_cpu_caps(u16 scope_mask)
+{
+	return __verify_local_cpu_caps(arm64_errata, scope_mask) &&
+	       __verify_local_cpu_caps(arm64_features, scope_mask);
+}
+
 /*
  * Check for CPU features that are used in early boot
  * based on the Boot CPU value.
  */
 static void check_early_cpu_features(void)
 {
-	verify_cpu_run_el();
 	verify_cpu_asid_bits();
+	/*
+	 * Early features are used by the kernel already. If there
+	 * is a conflict, we cannot proceed further.
+	 */
+	if (!verify_local_cpu_caps(SCOPE_BOOT_CPU))
+		cpu_panic_kernel();
 }
 
 static void
@@ -1278,27 +1520,6 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
 		}
 }
 
-static void
-verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list)
-{
-	const struct arm64_cpu_capabilities *caps = caps_list;
-	for (; caps->matches; caps++) {
-		if (!cpus_have_cap(caps->capability))
-			continue;
-		/*
-		 * If the new CPU misses an advertised feature, we cannot proceed
-		 * further, park the cpu.
-		 */
-		if (!__this_cpu_has_cap(caps_list, caps->capability)) {
-			pr_crit("CPU%d: missing feature: %s\n",
-					smp_processor_id(), caps->desc);
-			cpu_die_early();
-		}
-		if (caps->enable)
-			caps->enable((void *)caps);
-	}
-}
-
 static void verify_sve_features(void)
 {
 	u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
@@ -1316,6 +1537,7 @@ static void verify_sve_features(void)
 	/* Add checks on other ZCR bits here if necessary */
 }
 
+
 /*
  * Run through the enabled system capabilities and enable() it on this CPU.
  * The capabilities were decided based on the available CPUs at the boot time.
@@ -1326,8 +1548,14 @@ static void verify_sve_features(void)
  */
 static void verify_local_cpu_capabilities(void)
 {
-	verify_local_cpu_errata_workarounds();
-	verify_local_cpu_features(arm64_features);
+	/*
+	 * The capabilities with SCOPE_BOOT_CPU are checked from
+	 * check_early_cpu_features(), as they need to be verified
+	 * on all secondary CPUs.
+	 */
+	if (!verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU))
+		cpu_die_early();
+
 	verify_local_elf_hwcaps(arm64_elf_hwcaps);
 
 	if (system_supports_32bit_el0())
@@ -1335,9 +1563,6 @@ static void verify_local_cpu_capabilities(void)
 
 	if (system_supports_sve())
 		verify_sve_features();
-
-	if (system_uses_ttbr0_pan())
-		pr_info("Emulating Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
 }
 
 void check_local_cpu_capabilities(void)
@@ -1350,20 +1575,22 @@ void check_local_cpu_capabilities(void)
 
 	/*
 	 * If we haven't finalised the system capabilities, this CPU gets
-	 * a chance to update the errata work arounds.
+	 * a chance to update the errata work arounds and local features.
 	 * Otherwise, this CPU should verify that it has all the system
 	 * advertised capabilities.
 	 */
 	if (!sys_caps_initialised)
-		update_cpu_errata_workarounds();
+		update_cpu_capabilities(SCOPE_LOCAL_CPU);
 	else
 		verify_local_cpu_capabilities();
 }
 
-static void __init setup_feature_capabilities(void)
+static void __init setup_boot_cpu_capabilities(void)
 {
-	update_cpu_capabilities(arm64_features, "detected feature:");
-	enable_cpu_capabilities(arm64_features);
+	/* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */
+	update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
+	/* Enable the SCOPE_BOOT_CPU capabilities alone right away */
+	enable_cpu_capabilities(SCOPE_BOOT_CPU);
 }
 
 DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
@@ -1382,20 +1609,33 @@ bool this_cpu_has_cap(unsigned int cap)
 		__this_cpu_has_cap(arm64_errata, cap));
 }
 
+static void __init setup_system_capabilities(void)
+{
+	/*
+	 * We have finalised the system-wide safe feature
+	 * registers, finalise the capabilities that depend
+	 * on it. Also enable all the available capabilities,
+	 * that are not enabled already.
+	 */
+	update_cpu_capabilities(SCOPE_SYSTEM);
+	enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+}
+
 void __init setup_cpu_features(void)
 {
 	u32 cwg;
 	int cls;
 
-	/* Set the CPU feature capabilies */
-	setup_feature_capabilities();
-	enable_errata_workarounds();
+	setup_system_capabilities();
 	mark_const_caps_ready();
 	setup_elf_hwcaps(arm64_elf_hwcaps);
 
 	if (system_supports_32bit_el0())
 		setup_elf_hwcaps(compat_elf_hwcaps);
 
+	if (system_uses_ttbr0_pan())
+		pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+
 	sve_setup();
 
 	/* Advertise that we have computed the system capabilities */
@@ -1518,10 +1758,8 @@ static int __init enable_mrs_emulation(void)
 
 core_initcall(enable_mrs_emulation);
 
-int cpu_clear_disr(void *__unused)
+void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
 {
 	/* Firmware may have left a deferred SError in this register. */
 	write_sysreg_s(0, SYS_DISR_EL1);
-
-	return 0;
 }
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 7f94623df8a5..e9ab7b3ed317 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -77,6 +77,10 @@ static const char *const hwcap_str[] = {
 	"sha512",
 	"sve",
 	"asimdfhm",
+	"dit",
+	"uscat",
+	"ilrcpc",
+	"flagm",
 	NULL
 };
 
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 53781f5687c5..06ca574495af 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -33,6 +33,7 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/system_misc.h>
+#include <asm/traps.h>
 
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
@@ -223,7 +224,7 @@ static void send_user_sigtrap(int si_code)
 	if (interrupts_enabled(regs))
 		local_irq_enable();
 
-	force_sig_info(SIGTRAP, &info, current);
+	arm64_force_sig_info(&info, "User debug trap", current);
 }
 
 static int single_step_handler(unsigned long addr, unsigned int esr,
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e7226c4c7493..87a35364e750 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -39,7 +39,9 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 
+#include <asm/esr.h>
 #include <asm/fpsimd.h>
+#include <asm/cpufeature.h>
 #include <asm/cputype.h>
 #include <asm/simd.h>
 #include <asm/sigcontext.h>
@@ -64,7 +66,7 @@
  *     been loaded into its FPSIMD registers most recently, or whether it has
  *     been used to perform kernel mode NEON in the meantime.
  *
- * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
+ * For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to
  * the id of the current CPU every time the state is loaded onto a CPU. For (b),
  * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
  * address of the userland FPSIMD state of the task that was loaded onto the CPU
@@ -73,7 +75,7 @@
  * With this in place, we no longer have to restore the next FPSIMD state right
  * when switching between tasks. Instead, we can defer this check to userland
  * resume, at which time we verify whether the CPU's fpsimd_last_state and the
- * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
+ * task's fpsimd_cpu are still mutually in sync. If this is the case, we
  * can omit the FPSIMD restore.
  *
  * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
@@ -90,14 +92,14 @@
  * flag with local_bh_disable() unless softirqs are already masked.
  *
  * For a certain task, the sequence may look something like this:
- * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
+ * - the task gets scheduled in; if both the task's fpsimd_cpu field
  *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
  *   variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
  *   cleared, otherwise it is set;
  *
  * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
  *   userland FPSIMD state is copied from memory to the registers, the task's
- *   fpsimd_state.cpu field is set to the id of the current CPU, the current
+ *   fpsimd_cpu field is set to the id of the current CPU, the current
  *   CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
  *   TIF_FOREIGN_FPSTATE flag is cleared;
  *
@@ -115,7 +117,7 @@
  *   whatever is in the FPSIMD registers is not saved to memory, but discarded.
  */
 struct fpsimd_last_state_struct {
-	struct fpsimd_state *st;
+	struct user_fpsimd_state *st;
 	bool sve_in_use;
 };
 
@@ -222,7 +224,7 @@ static void sve_user_enable(void)
  *    sets TIF_SVE.
  *
  *    When stored, FPSIMD registers V0-V31 are encoded in
- *    task->fpsimd_state; bits [max : 128] for each of Z0-Z31 are
+ *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
  *    logically zero but not stored anywhere; P0-P15 and FFR are not
  *    stored and have unspecified values from userspace's point of
  *    view.  For hygiene purposes, the kernel zeroes them on next use,
@@ -231,9 +233,9 @@ static void sve_user_enable(void)
  *    task->thread.sve_state does not need to be non-NULL, valid or any
  *    particular size: it must not be dereferenced.
  *
- *  * FPSR and FPCR are always stored in task->fpsimd_state irrespctive of
- *    whether TIF_SVE is clear or set, since these are not vector length
- *    dependent.
+ *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
+ *    irrespective of whether TIF_SVE is clear or set, since these are
+ *    not vector length dependent.
  */
 
 /*
@@ -251,10 +253,10 @@ static void task_fpsimd_load(void)
 
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
 		sve_load_state(sve_pffr(current),
-			       &current->thread.fpsimd_state.fpsr,
+			       &current->thread.uw.fpsimd_state.fpsr,
 			       sve_vq_from_vl(current->thread.sve_vl) - 1);
 	else
-		fpsimd_load_state(&current->thread.fpsimd_state);
+		fpsimd_load_state(&current->thread.uw.fpsimd_state);
 
 	if (system_supports_sve()) {
 		/* Toggle SVE trapping for userspace if needed */
@@ -285,15 +287,14 @@ static void task_fpsimd_save(void)
 				 * re-enter user with corrupt state.
 				 * There's no way to recover, so kill it:
 				 */
-				force_signal_inject(
-					SIGKILL, 0, current_pt_regs(), 0);
+				force_signal_inject(SIGKILL, SI_KERNEL, 0);
 				return;
 			}
 
 			sve_save_state(sve_pffr(current),
-				       &current->thread.fpsimd_state.fpsr);
+				       &current->thread.uw.fpsimd_state.fpsr);
 		} else
-			fpsimd_save_state(&current->thread.fpsimd_state);
+			fpsimd_save_state(&current->thread.uw.fpsimd_state);
 	}
 }
 
@@ -404,20 +405,21 @@ static int __init sve_sysctl_init(void) { return 0; }
 	(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
 
 /*
- * Transfer the FPSIMD state in task->thread.fpsimd_state to
+ * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
  * task->thread.sve_state.
  *
  * Task can be a non-runnable task, or current.  In the latter case,
  * softirqs (and preemption) must be disabled.
  * task->thread.sve_state must point to at least sve_state_size(task)
  * bytes of allocated kernel memory.
- * task->thread.fpsimd_state must be up to date before calling this function.
+ * task->thread.uw.fpsimd_state must be up to date before calling this
+ * function.
  */
 static void fpsimd_to_sve(struct task_struct *task)
 {
 	unsigned int vq;
 	void *sst = task->thread.sve_state;
-	struct fpsimd_state const *fst = &task->thread.fpsimd_state;
+	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
 	unsigned int i;
 
 	if (!system_supports_sve())
@@ -431,7 +433,7 @@ static void fpsimd_to_sve(struct task_struct *task)
 
 /*
  * Transfer the SVE state in task->thread.sve_state to
- * task->thread.fpsimd_state.
+ * task->thread.uw.fpsimd_state.
  *
  * Task can be a non-runnable task, or current.  In the latter case,
  * softirqs (and preemption) must be disabled.
@@ -443,7 +445,7 @@ static void sve_to_fpsimd(struct task_struct *task)
 {
 	unsigned int vq;
 	void const *sst = task->thread.sve_state;
-	struct fpsimd_state *fst = &task->thread.fpsimd_state;
+	struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
 	unsigned int i;
 
 	if (!system_supports_sve())
@@ -510,7 +512,7 @@ void fpsimd_sync_to_sve(struct task_struct *task)
 }
 
 /*
- * Ensure that task->thread.fpsimd_state is up to date with respect to
+ * Ensure that task->thread.uw.fpsimd_state is up to date with respect to
  * the user task, irrespective of whether SVE is in use or not.
  *
  * This should only be called by ptrace.  task must be non-runnable.
@@ -525,21 +527,21 @@ void sve_sync_to_fpsimd(struct task_struct *task)
 
 /*
  * Ensure that task->thread.sve_state is up to date with respect to
- * the task->thread.fpsimd_state.
+ * the task->thread.uw.fpsimd_state.
  *
  * This should only be called by ptrace to merge new FPSIMD register
  * values into a task for which SVE is currently active.
  * task must be non-runnable.
  * task->thread.sve_state must point to at least sve_state_size(task)
  * bytes of allocated kernel memory.
- * task->thread.fpsimd_state must already have been initialised with
+ * task->thread.uw.fpsimd_state must already have been initialised with
  * the new FPSIMD register values to be merged in.
  */
 void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
 {
 	unsigned int vq;
 	void *sst = task->thread.sve_state;
-	struct fpsimd_state const *fst = &task->thread.fpsimd_state;
+	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
 	unsigned int i;
 
 	if (!test_tsk_thread_flag(task, TIF_SVE))
@@ -757,12 +759,10 @@ fail:
  * Enable SVE for EL1.
  * Intended for use by the cpufeatures code during CPU boot.
  */
-int sve_kernel_enable(void *__always_unused p)
+void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
 {
 	write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
 	isb();
-
-	return 0;
 }
 
 void __init sve_setup(void)
@@ -831,7 +831,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 {
 	/* Even if we chose not to use SVE, the hardware could still trap: */
 	if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
-		force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
 		return;
 	}
 
@@ -867,18 +867,20 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
 asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
 {
 	siginfo_t info;
-	unsigned int si_code = FPE_FIXME;
-
-	if (esr & FPEXC_IOF)
-		si_code = FPE_FLTINV;
-	else if (esr & FPEXC_DZF)
-		si_code = FPE_FLTDIV;
-	else if (esr & FPEXC_OFF)
-		si_code = FPE_FLTOVF;
-	else if (esr & FPEXC_UFF)
-		si_code = FPE_FLTUND;
-	else if (esr & FPEXC_IXF)
-		si_code = FPE_FLTRES;
+	unsigned int si_code = FPE_FLTUNK;
+
+	if (esr & ESR_ELx_FP_EXC_TFV) {
+		if (esr & FPEXC_IOF)
+			si_code = FPE_FLTINV;
+		else if (esr & FPEXC_DZF)
+			si_code = FPE_FLTDIV;
+		else if (esr & FPEXC_OFF)
+			si_code = FPE_FLTOVF;
+		else if (esr & FPEXC_UFF)
+			si_code = FPE_FLTUND;
+		else if (esr & FPEXC_IXF)
+			si_code = FPE_FLTRES;
+	}
 
 	memset(&info, 0, sizeof(info));
 	info.si_signo = SIGFPE;
@@ -908,10 +910,9 @@ void fpsimd_thread_switch(struct task_struct *next)
 		 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
 		 * upon the next return to userland.
 		 */
-		struct fpsimd_state *st = &next->thread.fpsimd_state;
-
-		if (__this_cpu_read(fpsimd_last_state.st) == st
-		    && st->cpu == smp_processor_id())
+		if (__this_cpu_read(fpsimd_last_state.st) ==
+			&next->thread.uw.fpsimd_state
+		    && next->thread.fpsimd_cpu == smp_processor_id())
 			clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
 		else
 			set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
@@ -927,7 +928,8 @@ void fpsimd_flush_thread(void)
 
 	local_bh_disable();
 
-	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
+	memset(&current->thread.uw.fpsimd_state, 0,
+	       sizeof(current->thread.uw.fpsimd_state));
 	fpsimd_flush_task_state(current);
 
 	if (system_supports_sve()) {
@@ -986,7 +988,7 @@ void fpsimd_preserve_current_state(void)
 
 /*
  * Like fpsimd_preserve_current_state(), but ensure that
- * current->thread.fpsimd_state is updated so that it can be copied to
+ * current->thread.uw.fpsimd_state is updated so that it can be copied to
  * the signal frame.
  */
 void fpsimd_signal_preserve_current_state(void)
@@ -1004,11 +1006,10 @@ static void fpsimd_bind_to_cpu(void)
 {
 	struct fpsimd_last_state_struct *last =
 		this_cpu_ptr(&fpsimd_last_state);
-	struct fpsimd_state *st = &current->thread.fpsimd_state;
 
-	last->st = st;
+	last->st = &current->thread.uw.fpsimd_state;
 	last->sve_in_use = test_thread_flag(TIF_SVE);
-	st->cpu = smp_processor_id();
+	current->thread.fpsimd_cpu = smp_processor_id();
 }
 
 /*
@@ -1043,7 +1044,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 
 	local_bh_disable();
 
-	current->thread.fpsimd_state.user_fpsimd = *state;
+	current->thread.uw.fpsimd_state = *state;
 	if (system_supports_sve() && test_thread_flag(TIF_SVE))
 		fpsimd_to_sve(current);
 
@@ -1060,7 +1061,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
  */
 void fpsimd_flush_task_state(struct task_struct *t)
 {
-	t->thread.fpsimd_state.cpu = NR_CPUS;
+	t->thread.fpsimd_cpu = NR_CPUS;
 }
 
 static inline void fpsimd_flush_cpu_state(void)
@@ -1159,7 +1160,7 @@ EXPORT_SYMBOL(kernel_neon_end);
 
 #ifdef CONFIG_EFI
 
-static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
+static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state);
 static DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
 static DEFINE_PER_CPU(bool, efi_sve_state_used);
 
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 47080c49cc7e..f0e6ab8abe9c 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -117,53 +117,42 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	/*
 	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
 	 * kernel image offset from the seed. Let's place the kernel in the
-	 * lower half of the VMALLOC area (VA_BITS - 2).
+	 * middle half of the VMALLOC area (VA_BITS - 2), and stay clear of
+	 * the lower and upper quarters to avoid colliding with other
+	 * allocations.
 	 * Even if we could randomize at page granularity for 16k and 64k pages,
 	 * let's always round to 2 MB so we don't interfere with the ability to
 	 * map using contiguous PTEs
 	 */
 	mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);
-	offset = seed & mask;
+	offset = BIT(VA_BITS - 3) + (seed & mask);
 
 	/* use the top 16 bits to randomize the linear region */
 	memstart_offset_seed = seed >> 48;
 
-	/*
-	 * The kernel Image should not extend across a 1GB/32MB/512MB alignment
-	 * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
-	 * happens, round down the KASLR offset by (1 << SWAPPER_TABLE_SHIFT).
-	 *
-	 * NOTE: The references to _text and _end below will already take the
-	 *       modulo offset (the physical displacement modulo 2 MB) into
-	 *       account, given that the physical placement is controlled by
-	 *       the loader, and will not change as a result of the virtual
-	 *       mapping we choose.
-	 */
-	if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
-	    (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
-		offset = round_down(offset, 1 << SWAPPER_TABLE_SHIFT);
-
 	if (IS_ENABLED(CONFIG_KASAN))
 		/*
 		 * KASAN does not expect the module region to intersect the
 		 * vmalloc region, since shadow memory is allocated for each
 		 * module at load time, whereas the vmalloc region is shadowed
 		 * by KASAN zero pages. So keep modules out of the vmalloc
-		 * region if KASAN is enabled.
+		 * region if KASAN is enabled, and put the kernel well within
+		 * 4 GB of the module region.
 		 */
-		return offset;
+		return offset % SZ_2G;
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
 		/*
-		 * Randomize the module region independently from the core
-		 * kernel. This prevents modules from leaking any information
+		 * Randomize the module region over a 4 GB window covering the
+		 * kernel. This reduces the risk of modules leaking information
 		 * about the address of the kernel itself, but results in
 		 * branches between modules and the core kernel that are
 		 * resolved via PLTs. (Branches between modules will be
 		 * resolved normally.)
 		 */
-		module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE;
-		module_alloc_base = VMALLOC_START;
+		module_range = SZ_4G - (u64)(_end - _stext);
+		module_alloc_base = max((u64)_end + offset - SZ_4G,
+					(u64)MODULES_VADDR);
 	} else {
 		/*
 		 * Randomize the module region by setting module_alloc_base to
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 2122cd187f19..a20de58061a8 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -138,14 +138,25 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
 void
 sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
 {
-	struct pt_regs *thread_regs;
+	struct cpu_context *cpu_context = &task->thread.cpu_context;
 
 	/* Initialize to zero */
 	memset((char *)gdb_regs, 0, NUMREGBYTES);
-	thread_regs = task_pt_regs(task);
-	memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES);
-	/* Special case for PSTATE (check comments in asm/kgdb.h for details) */
-	dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs);
+
+	gdb_regs[19] = cpu_context->x19;
+	gdb_regs[20] = cpu_context->x20;
+	gdb_regs[21] = cpu_context->x21;
+	gdb_regs[22] = cpu_context->x22;
+	gdb_regs[23] = cpu_context->x23;
+	gdb_regs[24] = cpu_context->x24;
+	gdb_regs[25] = cpu_context->x25;
+	gdb_regs[26] = cpu_context->x26;
+	gdb_regs[27] = cpu_context->x27;
+	gdb_regs[28] = cpu_context->x28;
+	gdb_regs[29] = cpu_context->fp;
+
+	gdb_regs[31] = cpu_context->sp;
+	gdb_regs[32] = cpu_context->pc;
 }
 
 void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index ea640f92fe5a..fa3637284a3d 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -36,11 +36,53 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
 		return (u64)&plt[i - 1];
 
 	pltsec->plt_num_entries++;
-	BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);
+	if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
+		return 0;
 
 	return (u64)&plt[i];
 }
 
+#ifdef CONFIG_ARM64_ERRATUM_843419
+u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val)
+{
+	struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
+							  &mod->arch.init;
+	struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
+	int i = pltsec->plt_num_entries++;
+	u32 mov0, mov1, mov2, br;
+	int rd;
+
+	if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
+		return 0;
+
+	/* get the destination register of the ADRP instruction */
+	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
+					  le32_to_cpup((__le32 *)loc));
+
+	/* generate the veneer instructions */
+	mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_INVERSE);
+	mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_KEEP);
+	mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_KEEP);
+	br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
+					 AARCH64_INSN_BRANCH_NOLINK);
+
+	plt[i] = (struct plt_entry){
+			cpu_to_le32(mov0),
+			cpu_to_le32(mov1),
+			cpu_to_le32(mov2),
+			cpu_to_le32(br)
+		};
+
+	return (u64)&plt[i];
+}
+#endif
+
 #define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b))
 
 static int cmp_rela(const void *a, const void *b)
@@ -68,16 +110,21 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num)
 }
 
 static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
-			       Elf64_Word dstidx)
+			       Elf64_Word dstidx, Elf_Shdr *dstsec)
 {
 	unsigned int ret = 0;
 	Elf64_Sym *s;
 	int i;
 
 	for (i = 0; i < num; i++) {
+		u64 min_align;
+
 		switch (ELF64_R_TYPE(rela[i].r_info)) {
 		case R_AARCH64_JUMP26:
 		case R_AARCH64_CALL26:
+			if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+				break;
+
 			/*
 			 * We only have to consider branch targets that resolve
 			 * to symbols that are defined in a different section.
@@ -109,6 +156,41 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
 			if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
 				ret++;
 			break;
+		case R_AARCH64_ADR_PREL_PG_HI21_NC:
+		case R_AARCH64_ADR_PREL_PG_HI21:
+			if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
+			    !cpus_have_const_cap(ARM64_WORKAROUND_843419))
+				break;
+
+			/*
+			 * Determine the minimal safe alignment for this ADRP
+			 * instruction: the section alignment at which it is
+			 * guaranteed not to appear at a vulnerable offset.
+			 *
+			 * This comes down to finding the least significant zero
+			 * bit in bits [11:3] of the section offset, and
+			 * increasing the section's alignment so that the
+			 * resulting address of this instruction is guaranteed
+			 * to equal the offset in that particular bit (as well
+			 * as all less signficant bits). This ensures that the
+			 * address modulo 4 KB != 0xfff8 or 0xfffc (which would
+			 * have all ones in bits [11:3])
+			 */
+			min_align = 2ULL << ffz(rela[i].r_offset | 0x7);
+
+			/*
+			 * Allocate veneer space for each ADRP that may appear
+			 * at a vulnerable offset nonetheless. At relocation
+			 * time, some of these will remain unused since some
+			 * ADRP instructions can be patched to ADR instructions
+			 * instead.
+			 */
+			if (min_align > SZ_4K)
+				ret++;
+			else
+				dstsec->sh_addralign = max(dstsec->sh_addralign,
+							   min_align);
+			break;
 		}
 	}
 	return ret;
@@ -166,10 +248,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 
 		if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
 			core_plts += count_plts(syms, rels, numrels,
-						sechdrs[i].sh_info);
+						sechdrs[i].sh_info, dstsec);
 		else
 			init_plts += count_plts(syms, rels, numrels,
-						sechdrs[i].sh_info);
+						sechdrs[i].sh_info, dstsec);
 	}
 
 	mod->arch.core.plt->sh_type = SHT_NOBITS;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index f469e0435903..719fde8dcc19 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -55,9 +55,10 @@ void *module_alloc(unsigned long size)
 		 * less likely that the module region gets exhausted, so we
 		 * can simply omit this fallback in that case.
 		 */
-		p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START,
-				VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
-				NUMA_NO_NODE, __builtin_return_address(0));
+		p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+				module_alloc_base + SZ_4G, GFP_KERNEL,
+				PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+				__builtin_return_address(0));
 
 	if (p && (kasan_module_alloc(p, size) < 0)) {
 		vfree(p);
@@ -197,6 +198,34 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
 	return 0;
 }
 
+static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val)
+{
+	u32 insn;
+
+	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
+	    !cpus_have_const_cap(ARM64_WORKAROUND_843419) ||
+	    ((u64)place & 0xfff) < 0xff8)
+		return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
+				      AARCH64_INSN_IMM_ADR);
+
+	/* patch ADRP to ADR if it is in range */
+	if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
+			    AARCH64_INSN_IMM_ADR)) {
+		insn = le32_to_cpu(*place);
+		insn &= ~BIT(31);
+	} else {
+		/* out of range for ADR -> emit a veneer */
+		val = module_emit_adrp_veneer(mod, place, val & ~0xfff);
+		if (!val)
+			return -ENOEXEC;
+		insn = aarch64_insn_gen_branch_imm((u64)place, val,
+						   AARCH64_INSN_BRANCH_NOLINK);
+	}
+
+	*place = cpu_to_le32(insn);
+	return 0;
+}
+
 int apply_relocate_add(Elf64_Shdr *sechdrs,
 		       const char *strtab,
 		       unsigned int symindex,
@@ -336,14 +365,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
 					     AARCH64_INSN_IMM_ADR);
 			break;
-#ifndef CONFIG_ARM64_ERRATUM_843419
 		case R_AARCH64_ADR_PREL_PG_HI21_NC:
 			overflow_check = false;
 		case R_AARCH64_ADR_PREL_PG_HI21:
-			ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
-					     AARCH64_INSN_IMM_ADR);
+			ovf = reloc_insn_adrp(me, loc, val);
+			if (ovf && ovf != -ERANGE)
+				return ovf;
 			break;
-#endif
 		case R_AARCH64_ADD_ABS_LO12_NC:
 		case R_AARCH64_LDST8_ABS_LO12_NC:
 			overflow_check = false;
@@ -386,6 +414,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
 			    ovf == -ERANGE) {
 				val = module_emit_plt_entry(me, loc, &rel[i], sym);
+				if (!val)
+					return -ENOEXEC;
 				ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
 						     26, AARCH64_INSN_IMM_26);
 			}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c0da6efe5465..f08a2ed9db0d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -257,7 +257,7 @@ static void tls_thread_flush(void)
 	write_sysreg(0, tpidr_el0);
 
 	if (is_compat_task()) {
-		current->thread.tp_value = 0;
+		current->thread.uw.tp_value = 0;
 
 		/*
 		 * We need to ensure ordering between the shadow state and the
@@ -351,7 +351,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 		 * for the new thread.
 		 */
 		if (clone_flags & CLONE_SETTLS)
-			p->thread.tp_value = childregs->regs[3];
+			p->thread.uw.tp_value = childregs->regs[3];
 	} else {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->pstate = PSR_MODE_EL1h;
@@ -379,7 +379,7 @@ static void tls_thread_switch(struct task_struct *next)
 	tls_preserve_current_state();
 
 	if (is_compat_thread(task_thread_info(next)))
-		write_sysreg(next->thread.tp_value, tpidrro_el0);
+		write_sysreg(next->thread.uw.tp_value, tpidrro_el0);
 	else if (!arm64_kernel_unmapped_at_el0())
 		write_sysreg(0, tpidrro_el0);
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 9ae31f7e2243..71d99af24ef2 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -209,7 +209,7 @@ static void ptrace_hbptriggered(struct perf_event *bp,
 		force_sig_ptrace_errno_trap(si_errno, (void __user *)bkpt->trigger);
 	}
 #endif
-	force_sig_info(SIGTRAP, &info, current);
+	arm64_force_sig_info(&info, "Hardware breakpoint trap (ptrace)", current);
 }
 
 /*
@@ -629,7 +629,7 @@ static int __fpr_get(struct task_struct *target,
 
 	sve_sync_to_fpsimd(target);
 
-	uregs = &target->thread.fpsimd_state.user_fpsimd;
+	uregs = &target->thread.uw.fpsimd_state;
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
 				   start_pos, start_pos + sizeof(*uregs));
@@ -655,19 +655,19 @@ static int __fpr_set(struct task_struct *target,
 	struct user_fpsimd_state newstate;
 
 	/*
-	 * Ensure target->thread.fpsimd_state is up to date, so that a
+	 * Ensure target->thread.uw.fpsimd_state is up to date, so that a
 	 * short copyin can't resurrect stale data.
 	 */
 	sve_sync_to_fpsimd(target);
 
-	newstate = target->thread.fpsimd_state.user_fpsimd;
+	newstate = target->thread.uw.fpsimd_state;
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate,
 				 start_pos, start_pos + sizeof(newstate));
 	if (ret)
 		return ret;
 
-	target->thread.fpsimd_state.user_fpsimd = newstate;
+	target->thread.uw.fpsimd_state = newstate;
 
 	return ret;
 }
@@ -692,7 +692,7 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count,
 		   void *kbuf, void __user *ubuf)
 {
-	unsigned long *tls = &target->thread.tp_value;
+	unsigned long *tls = &target->thread.uw.tp_value;
 
 	if (target == current)
 		tls_preserve_current_state();
@@ -705,13 +705,13 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
 		   const void *kbuf, const void __user *ubuf)
 {
 	int ret;
-	unsigned long tls = target->thread.tp_value;
+	unsigned long tls = target->thread.uw.tp_value;
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
 	if (ret)
 		return ret;
 
-	target->thread.tp_value = tls;
+	target->thread.uw.tp_value = tls;
 	return ret;
 }
 
@@ -842,7 +842,7 @@ static int sve_get(struct task_struct *target,
 	start = end;
 	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
 	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.fpsimd_state.fpsr,
+				  &target->thread.uw.fpsimd_state.fpsr,
 				  start, end);
 	if (ret)
 		return ret;
@@ -941,7 +941,7 @@ static int sve_set(struct task_struct *target,
 	start = end;
 	end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE;
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				 &target->thread.fpsimd_state.fpsr,
+				 &target->thread.uw.fpsimd_state.fpsr,
 				 start, end);
 
 out:
@@ -1169,7 +1169,7 @@ static int compat_vfp_get(struct task_struct *target,
 	compat_ulong_t fpscr;
 	int ret, vregs_end_pos;
 
-	uregs = &target->thread.fpsimd_state.user_fpsimd;
+	uregs = &target->thread.uw.fpsimd_state;
 
 	if (target == current)
 		fpsimd_preserve_current_state();
@@ -1202,7 +1202,7 @@ static int compat_vfp_set(struct task_struct *target,
 	compat_ulong_t fpscr;
 	int ret, vregs_end_pos;
 
-	uregs = &target->thread.fpsimd_state.user_fpsimd;
+	uregs = &target->thread.uw.fpsimd_state;
 
 	vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0,
@@ -1225,7 +1225,7 @@ static int compat_tls_get(struct task_struct *target,
 			  const struct user_regset *regset, unsigned int pos,
 			  unsigned int count, void *kbuf, void __user *ubuf)
 {
-	compat_ulong_t tls = (compat_ulong_t)target->thread.tp_value;
+	compat_ulong_t tls = (compat_ulong_t)target->thread.uw.tp_value;
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
 }
 
@@ -1235,13 +1235,13 @@ static int compat_tls_set(struct task_struct *target,
 			  const void __user *ubuf)
 {
 	int ret;
-	compat_ulong_t tls = target->thread.tp_value;
+	compat_ulong_t tls = target->thread.uw.tp_value;
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
 	if (ret)
 		return ret;
 
-	target->thread.tp_value = tls;
+	target->thread.uw.tp_value = tls;
 	return ret;
 }
 
@@ -1538,7 +1538,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			break;
 
 		case COMPAT_PTRACE_GET_THREAD_AREA:
-			ret = put_user((compat_ulong_t)child->thread.tp_value,
+			ret = put_user((compat_ulong_t)child->thread.uw.tp_value,
 				       (compat_ulong_t __user *)datap);
 			break;
 
diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index c124752a8bd3..5915ce5759cc 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -28,6 +28,7 @@ asmlinkage u64 absolute_data16(void);
 asmlinkage u64 signed_movw(void);
 asmlinkage u64 unsigned_movw(void);
 asmlinkage u64 relative_adrp(void);
+asmlinkage u64 relative_adrp_far(void);
 asmlinkage u64 relative_adr(void);
 asmlinkage u64 relative_data64(void);
 asmlinkage u64 relative_data32(void);
@@ -43,9 +44,8 @@ static struct {
 	{ "R_AARCH64_ABS16",		absolute_data16, UL(SYM16_ABS_VAL) },
 	{ "R_AARCH64_MOVW_SABS_Gn",	signed_movw, UL(SYM64_ABS_VAL) },
 	{ "R_AARCH64_MOVW_UABS_Gn",	unsigned_movw, UL(SYM64_ABS_VAL) },
-#ifndef CONFIG_ARM64_ERRATUM_843419
 	{ "R_AARCH64_ADR_PREL_PG_HI21",	relative_adrp, (u64)&sym64_rel },
-#endif
+	{ "R_AARCH64_ADR_PREL_PG_HI21",	relative_adrp_far, (u64)&memstart_addr },
 	{ "R_AARCH64_ADR_PREL_LO21",	relative_adr, (u64)&sym64_rel },
 	{ "R_AARCH64_PREL64",		relative_data64, (u64)&sym64_rel },
 	{ "R_AARCH64_PREL32",		relative_data32, (u64)&sym64_rel },
diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S
index e1edcefeb02d..2b8d9cb8b078 100644
--- a/arch/arm64/kernel/reloc_test_syms.S
+++ b/arch/arm64/kernel/reloc_test_syms.S
@@ -43,15 +43,21 @@ ENTRY(unsigned_movw)
 	ret
 ENDPROC(unsigned_movw)
 
-#ifndef CONFIG_ARM64_ERRATUM_843419
-
+	.align	12
+	.space	0xff8
 ENTRY(relative_adrp)
 	adrp	x0, sym64_rel
 	add	x0, x0, #:lo12:sym64_rel
 	ret
 ENDPROC(relative_adrp)
 
-#endif
+	.align	12
+	.space	0xffc
+ENTRY(relative_adrp_far)
+	adrp	x0, memstart_addr
+	add	x0, x0, #:lo12:memstart_addr
+	ret
+ENDPROC(relative_adrp_far)
 
 ENTRY(relative_adr)
 	adr	x0, sym64_rel
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f60c052e8d1c..154b7d30145d 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -40,6 +40,7 @@
 #include <asm/fpsimd.h>
 #include <asm/ptrace.h>
 #include <asm/signal32.h>
+#include <asm/traps.h>
 #include <asm/vdso.h>
 
 /*
@@ -179,7 +180,7 @@ static void __user *apply_user_offset(
 static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
 {
 	struct user_fpsimd_state const *fpsimd =
-		&current->thread.fpsimd_state.user_fpsimd;
+		&current->thread.uw.fpsimd_state;
 	int err;
 
 	/* copy the FP and status/control registers */
@@ -565,11 +566,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	return regs->regs[0];
 
 badframe:
-	if (show_unhandled_signals)
-		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n",
-				    current->comm, task_pid_nr(current), __func__,
-				    regs->pc, regs->sp);
-	force_sig(SIGSEGV, current);
+	arm64_notify_segfault(regs->sp);
 	return 0;
 }
 
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 79feb861929b..77b91f478995 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -26,6 +26,7 @@
 #include <asm/esr.h>
 #include <asm/fpsimd.h>
 #include <asm/signal32.h>
+#include <asm/traps.h>
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 
@@ -149,7 +150,7 @@ union __fpsimd_vreg {
 static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
 {
 	struct user_fpsimd_state const *fpsimd =
-		&current->thread.fpsimd_state.user_fpsimd;
+		&current->thread.uw.fpsimd_state;
 	compat_ulong_t magic = VFP_MAGIC;
 	compat_ulong_t size = VFP_STORAGE_SIZE;
 	compat_ulong_t fpscr, fpexc;
@@ -307,11 +308,7 @@ asmlinkage int compat_sys_sigreturn(struct pt_regs *regs)
 	return regs->regs[0];
 
 badframe:
-	if (show_unhandled_signals)
-		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n",
-				    current->comm, task_pid_nr(current), __func__,
-				    regs->pc, regs->compat_sp);
-	force_sig(SIGSEGV, current);
+	arm64_notify_segfault(regs->compat_sp);
 	return 0;
 }
 
@@ -344,11 +341,7 @@ asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs)
 	return regs->regs[0];
 
 badframe:
-	if (show_unhandled_signals)
-		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n",
-				    current->comm, task_pid_nr(current), __func__,
-				    regs->pc, regs->compat_sp);
-	force_sig(SIGSEGV, current);
+	arm64_notify_segfault(regs->compat_sp);
 	return 0;
 }
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b8ad7be9c33..f3e2e3aec0b0 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -85,43 +85,6 @@ enum ipi_msg_type {
 	IPI_WAKEUP
 };
 
-#ifdef CONFIG_ARM64_VHE
-
-/* Whether the boot CPU is running in HYP mode or not*/
-static bool boot_cpu_hyp_mode;
-
-static inline void save_boot_cpu_run_el(void)
-{
-	boot_cpu_hyp_mode = is_kernel_in_hyp_mode();
-}
-
-static inline bool is_boot_cpu_in_hyp_mode(void)
-{
-	return boot_cpu_hyp_mode;
-}
-
-/*
- * Verify that a secondary CPU is running the kernel at the same
- * EL as that of the boot CPU.
- */
-void verify_cpu_run_el(void)
-{
-	bool in_el2 = is_kernel_in_hyp_mode();
-	bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode();
-
-	if (in_el2 ^ boot_cpu_el2) {
-		pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n",
-					smp_processor_id(),
-					in_el2 ? 2 : 1,
-					boot_cpu_el2 ? 2 : 1);
-		cpu_panic_kernel();
-	}
-}
-
-#else
-static inline void save_boot_cpu_run_el(void) {}
-#endif
-
 #ifdef CONFIG_HOTPLUG_CPU
 static int op_cpu_kill(unsigned int cpu);
 #else
@@ -447,13 +410,6 @@ void __init smp_prepare_boot_cpu(void)
 	 */
 	jump_label_init();
 	cpuinfo_store_boot_cpu();
-	save_boot_cpu_run_el();
-	/*
-	 * Run the errata work around checks on the boot CPU, once we have
-	 * initialised the cpu feature infrastructure from
-	 * cpuinfo_store_boot_cpu() above.
-	 */
-	update_cpu_errata_workarounds();
 }
 
 static u64 __init of_get_cpu_mpidr(struct device_node *dn)
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index a382b2a1b84e..93ab57dcfc14 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -27,6 +27,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/cacheflush.h>
+#include <asm/system_misc.h>
 #include <asm/unistd.h>
 
 static long
@@ -67,6 +68,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)
  */
 long compat_arm_syscall(struct pt_regs *regs)
 {
+	siginfo_t info;
 	unsigned int no = regs->regs[7];
 
 	switch (no) {
@@ -88,7 +90,7 @@ long compat_arm_syscall(struct pt_regs *regs)
 		return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]);
 
 	case __ARM_NR_compat_set_tls:
-		current->thread.tp_value = regs->regs[0];
+		current->thread.uw.tp_value = regs->regs[0];
 
 		/*
 		 * Protect against register corruption from context switch.
@@ -99,6 +101,23 @@ long compat_arm_syscall(struct pt_regs *regs)
 		return 0;
 
 	default:
-		return -ENOSYS;
+		/*
+		 * Calls 9f00xx..9f07ff are defined to return -ENOSYS
+		 * if not implemented, rather than raising SIGILL. This
+		 * way the calling program can gracefully determine whether
+		 * a feature is supported.
+		 */
+		if ((no & 0xffff) <= 0x7ff)
+			return -ENOSYS;
+		break;
 	}
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLTRP;
+	info.si_addr  = (void __user *)instruction_pointer(regs) -
+			 (compat_thumb_mode(regs) ? 2 : 4);
+
+	arm64_notify_die("Oops - bad compat syscall(2)", regs, &info, no);
+	return 0;
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index eb2d15147e8d..ba964da31a25 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
 
 #include <asm/atomic.h>
 #include <asm/bug.h>
+#include <asm/cpufeature.h>
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
@@ -223,13 +224,46 @@ void die(const char *str, struct pt_regs *regs, int err)
 		do_exit(SIGSEGV);
 }
 
+static bool show_unhandled_signals_ratelimited(void)
+{
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+				      DEFAULT_RATELIMIT_BURST);
+	return show_unhandled_signals && __ratelimit(&rs);
+}
+
+void arm64_force_sig_info(struct siginfo *info, const char *str,
+			  struct task_struct *tsk)
+{
+	unsigned int esr = tsk->thread.fault_code;
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	if (!unhandled_signal(tsk, info->si_signo))
+		goto send_sig;
+
+	if (!show_unhandled_signals_ratelimited())
+		goto send_sig;
+
+	pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk));
+	if (esr)
+		pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr);
+
+	pr_cont("%s", str);
+	print_vma_addr(KERN_CONT " in ", regs->pc);
+	pr_cont("\n");
+	__show_regs(regs);
+
+send_sig:
+	force_sig_info(info->si_signo, info, tsk);
+}
+
 void arm64_notify_die(const char *str, struct pt_regs *regs,
 		      struct siginfo *info, int err)
 {
 	if (user_mode(regs)) {
+		WARN_ON(regs != current_pt_regs());
 		current->thread.fault_address = 0;
 		current->thread.fault_code = err;
-		force_sig_info(info->si_signo, info, current);
+		arm64_force_sig_info(info, str, current);
 	} else {
 		die(str, regs, err);
 	}
@@ -311,12 +345,13 @@ exit:
 	return fn ? fn(regs, instr) : 1;
 }
 
-void force_signal_inject(int signal, int code, struct pt_regs *regs,
-			 unsigned long address)
+void force_signal_inject(int signal, int code, unsigned long address)
 {
 	siginfo_t info;
-	void __user *pc = (void __user *)instruction_pointer(regs);
 	const char *desc;
+	struct pt_regs *regs = current_pt_regs();
+
+	clear_siginfo(&info);
 
 	switch (signal) {
 	case SIGILL:
@@ -330,17 +365,16 @@ void force_signal_inject(int signal, int code, struct pt_regs *regs,
 		break;
 	}
 
-	if (unhandled_signal(current, signal) &&
-	    show_unhandled_signals_ratelimited()) {
-		pr_info("%s[%d]: %s: pc=%p\n",
-			current->comm, task_pid_nr(current), desc, pc);
-		dump_instr(KERN_INFO, regs);
+	/* Force signals we don't understand to SIGKILL */
+	if (WARN_ON(signal != SIGKILL ||
+		    siginfo_layout(signal, code) != SIL_FAULT)) {
+		signal = SIGKILL;
 	}
 
 	info.si_signo = signal;
 	info.si_errno = 0;
 	info.si_code  = code;
-	info.si_addr  = pc;
+	info.si_addr  = (void __user *)address;
 
 	arm64_notify_die(desc, regs, &info, 0);
 }
@@ -348,7 +382,7 @@ void force_signal_inject(int signal, int code, struct pt_regs *regs,
 /*
  * Set up process info to signal segmentation fault - called on access error.
  */
-void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr)
+void arm64_notify_segfault(unsigned long addr)
 {
 	int code;
 
@@ -359,7 +393,7 @@ void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr)
 		code = SEGV_ACCERR;
 	up_read(&current->mm->mmap_sem);
 
-	force_signal_inject(SIGSEGV, code, regs, addr);
+	force_signal_inject(SIGSEGV, code, addr);
 }
 
 asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
@@ -371,13 +405,12 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	if (call_undef_hook(regs) == 0)
 		return;
 
-	force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
 }
 
-int cpu_enable_cache_maint_trap(void *__unused)
+void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 {
 	config_sctlr_el1(SCTLR_EL1_UCI, 0);
-	return 0;
 }
 
 #define __user_cache_maint(insn, address, res)			\
@@ -426,12 +459,12 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 		__user_cache_maint("ic ivau", address, ret);
 		break;
 	default:
-		force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
 		return;
 	}
 
 	if (ret)
-		arm64_notify_segfault(regs, address);
+		arm64_notify_segfault(address);
 	else
 		arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
 }
@@ -600,11 +633,6 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 {
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
-	console_verbose();
-
-	pr_crit("Bad EL0 synchronous exception detected on CPU%d, code 0x%08x -- %s\n",
-		smp_processor_id(), esr, esr_get_class_string(esr));
-	__show_regs(regs);
 
 	info.si_signo = SIGILL;
 	info.si_errno = 0;
@@ -612,9 +640,9 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 	info.si_addr  = pc;
 
 	current->thread.fault_address = 0;
-	current->thread.fault_code = 0;
+	current->thread.fault_code = esr;
 
-	force_sig_info(info.si_signo, &info, current);
+	arm64_force_sig_info(&info, "Bad EL0 synchronous exception", current);
 }
 
 #ifdef CONFIG_VMAP_STACK
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 4e696f96451f..0ead8a1d1679 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -17,6 +17,7 @@ CFLAGS_atomic_ll_sc.o	:= -fcall-used-x0 -ffixed-x1 -ffixed-x2		\
 		   -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9		\
 		   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12	\
 		   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15	\
-		   -fcall-saved-x18
+		   -fcall-saved-x18 -fomit-frame-pointer
+CFLAGS_REMOVE_atomic_ll_sc.o := -pg
 
 lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 758bde7e2fa6..30334d81b021 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -50,6 +50,10 @@ ENTRY(flush_icache_range)
  */
 ENTRY(__flush_cache_user_range)
 	uaccess_ttbr0_enable x2, x3, x4
+alternative_if ARM64_HAS_CACHE_IDC
+	dsb	ishst
+	b	7f
+alternative_else_nop_endif
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x0, x3
@@ -60,8 +64,13 @@ user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
 	b.lo	1b
 	dsb	ish
 
+7:
+alternative_if ARM64_HAS_CACHE_DIC
+	isb
+	b	8f
+alternative_else_nop_endif
 	invalidate_icache_by_line x0, x1, x2, x3, 9f
-	mov	x0, #0
+8:	mov	x0, #0
 1:
 	uaccess_ttbr0_disable x1, x2
 	ret
@@ -80,6 +89,12 @@ ENDPROC(__flush_cache_user_range)
  *	- end     - virtual end address of region
  */
 ENTRY(invalidate_icache_range)
+alternative_if ARM64_HAS_CACHE_DIC
+	mov	x0, xzr
+	isb
+	ret
+alternative_else_nop_endif
+
 	uaccess_ttbr0_enable x2, x3, x4
 
 	invalidate_icache_by_line x0, x1, x2, x3, 2f
@@ -116,6 +131,10 @@ ENDPIPROC(__flush_dcache_area)
  *	- size    - size in question
  */
 ENTRY(__clean_dcache_area_pou)
+alternative_if ARM64_HAS_CACHE_IDC
+	dsb	ishst
+	ret
+alternative_else_nop_endif
 	dcache_by_line_op cvau, ish, x0, x1, x2, x3
 	ret
 ENDPROC(__clean_dcache_area_pou)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index bff11553eb05..4165485e8b6e 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -43,6 +43,7 @@
 #include <asm/system_misc.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
+#include <asm/traps.h>
 
 #include <acpi/ghes.h>
 
@@ -289,58 +290,31 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
 	do_exit(SIGKILL);
 }
 
-static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
-			    unsigned int esr, unsigned int sig, int code,
-			    struct pt_regs *regs, int fault)
+static void __do_user_fault(struct siginfo *info, unsigned int esr)
 {
-	struct siginfo si;
-	const struct fault_info *inf;
-	unsigned int lsb = 0;
-
-	if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
-		inf = esr_to_fault_info(esr);
-		pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x",
-			tsk->comm, task_pid_nr(tsk), inf->name, sig,
-			addr, esr);
-		print_vma_addr(KERN_CONT ", in ", regs->pc);
-		pr_cont("\n");
-		__show_regs(regs);
-	}
-
-	tsk->thread.fault_address = addr;
-	tsk->thread.fault_code = esr;
-	si.si_signo = sig;
-	si.si_errno = 0;
-	si.si_code = code;
-	si.si_addr = (void __user *)addr;
-	/*
-	 * Either small page or large page may be poisoned.
-	 * In other words, VM_FAULT_HWPOISON_LARGE and
-	 * VM_FAULT_HWPOISON are mutually exclusive.
-	 */
-	if (fault & VM_FAULT_HWPOISON_LARGE)
-		lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
-	else if (fault & VM_FAULT_HWPOISON)
-		lsb = PAGE_SHIFT;
-	si.si_addr_lsb = lsb;
-
-	force_sig_info(sig, &si, tsk);
+	current->thread.fault_address = (unsigned long)info->si_addr;
+	current->thread.fault_code = esr;
+	arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current);
 }
 
 static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
-	struct task_struct *tsk = current;
-	const struct fault_info *inf;
-
 	/*
 	 * If we are in kernel mode at this point, we have no context to
 	 * handle this fault with.
 	 */
 	if (user_mode(regs)) {
-		inf = esr_to_fault_info(esr);
-		__do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0);
-	} else
+		const struct fault_info *inf = esr_to_fault_info(esr);
+		struct siginfo si = {
+			.si_signo	= inf->sig,
+			.si_code	= inf->code,
+			.si_addr	= (void __user *)addr,
+		};
+
+		__do_user_fault(&si, esr);
+	} else {
 		__do_kernel_fault(addr, esr, regs);
+	}
 }
 
 #define VM_FAULT_BADMAP		0x010000
@@ -393,7 +367,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 {
 	struct task_struct *tsk;
 	struct mm_struct *mm;
-	int fault, sig, code, major = 0;
+	struct siginfo si;
+	int fault, major = 0;
 	unsigned long vm_flags = VM_READ | VM_WRITE;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
@@ -525,27 +500,37 @@ retry:
 		return 0;
 	}
 
+	clear_siginfo(&si);
+	si.si_addr = (void __user *)addr;
+
 	if (fault & VM_FAULT_SIGBUS) {
 		/*
 		 * We had some memory, but were unable to successfully fix up
 		 * this page fault.
 		 */
-		sig = SIGBUS;
-		code = BUS_ADRERR;
-	} else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) {
-		sig = SIGBUS;
-		code = BUS_MCEERR_AR;
+		si.si_signo	= SIGBUS;
+		si.si_code	= BUS_ADRERR;
+	} else if (fault & VM_FAULT_HWPOISON_LARGE) {
+		unsigned int hindex = VM_FAULT_GET_HINDEX(fault);
+
+		si.si_signo	= SIGBUS;
+		si.si_code	= BUS_MCEERR_AR;
+		si.si_addr_lsb	= hstate_index_to_shift(hindex);
+	} else if (fault & VM_FAULT_HWPOISON) {
+		si.si_signo	= SIGBUS;
+		si.si_code	= BUS_MCEERR_AR;
+		si.si_addr_lsb	= PAGE_SHIFT;
 	} else {
 		/*
 		 * Something tried to access memory that isn't in our memory
 		 * map.
 		 */
-		sig = SIGSEGV;
-		code = fault == VM_FAULT_BADACCESS ?
-			SEGV_ACCERR : SEGV_MAPERR;
+		si.si_signo	= SIGSEGV;
+		si.si_code	= fault == VM_FAULT_BADACCESS ?
+				  SEGV_ACCERR : SEGV_MAPERR;
 	}
 
-	__do_user_fault(tsk, addr, esr, sig, code, regs, fault);
+	__do_user_fault(&si, esr);
 	return 0;
 
 no_context:
@@ -582,8 +567,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 	const struct fault_info *inf;
 
 	inf = esr_to_fault_info(esr);
-	pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
-		inf->name, esr, addr);
 
 	/*
 	 * Synchronous aborts may interrupt code which had interrupts masked.
@@ -600,83 +583,83 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 			nmi_exit();
 	}
 
-	info.si_signo = SIGBUS;
+	info.si_signo = inf->sig;
 	info.si_errno = 0;
-	info.si_code  = BUS_FIXME;
+	info.si_code  = inf->code;
 	if (esr & ESR_ELx_FnV)
 		info.si_addr = NULL;
 	else
 		info.si_addr  = (void __user *)addr;
-	arm64_notify_die("", regs, &info, esr);
+	arm64_notify_die(inf->name, regs, &info, esr);
 
 	return 0;
 }
 
 static const struct fault_info fault_info[] = {
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"ttbr address size fault"	},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"level 1 address size fault"	},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"level 2 address size fault"	},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"level 3 address size fault"	},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"ttbr address size fault"	},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"level 1 address size fault"	},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"level 2 address size fault"	},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"level 3 address size fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 8"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 8"			},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 12"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 12"			},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"synchronous external abort"	},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 17"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 18"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 19"			},
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 0 (translation table walk)"	},
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 1 (translation table walk)"	},
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 2 (translation table walk)"	},
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 3 (translation table walk)"	},
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"synchronous parity or ECC error" },	// Reserved when RAS is implemented
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 25"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 26"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 27"			},
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 0 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 1 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 2 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
-	{ do_sea,		SIGBUS,  BUS_FIXME,	"level 3 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 32"			},
+	{ do_sea,		SIGBUS,  BUS_OBJERR,	"synchronous external abort"	},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 17"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 18"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 19"			},
+	{ do_sea,		SIGKILL, SI_KERNEL,	"level 0 (translation table walk)"	},
+	{ do_sea,		SIGKILL, SI_KERNEL,	"level 1 (translation table walk)"	},
+	{ do_sea,		SIGKILL, SI_KERNEL,	"level 2 (translation table walk)"	},
+	{ do_sea,		SIGKILL, SI_KERNEL,	"level 3 (translation table walk)"	},
+	{ do_sea,		SIGBUS,  BUS_OBJERR,	"synchronous parity or ECC error" },	// Reserved when RAS is implemented
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 25"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 26"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 27"			},
+	{ do_sea,		SIGKILL, SI_KERNEL,	"level 0 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
+	{ do_sea,		SIGKILL, SI_KERNEL,	"level 1 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
+	{ do_sea,		SIGKILL, SI_KERNEL,	"level 2 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
+	{ do_sea,		SIGKILL, SI_KERNEL,	"level 3 synchronous parity error (translation table walk)"	},	// Reserved when RAS is implemented
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 32"			},
 	{ do_alignment_fault,	SIGBUS,  BUS_ADRALN,	"alignment fault"		},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 34"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 35"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 36"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 37"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 38"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 39"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 40"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 41"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 42"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 43"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 44"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 45"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 46"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 47"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"TLB conflict abort"		},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"Unsupported atomic hardware update fault"	},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 50"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 51"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"implementation fault (lockdown abort)" },
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"implementation fault (unsupported exclusive)" },
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 54"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 55"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 56"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 57"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 58" 			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 59"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 60"			},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"section domain fault"		},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"page domain fault"		},
-	{ do_bad,		SIGBUS,  BUS_FIXME,	"unknown 63"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 34"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 35"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 36"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 37"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 38"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 39"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 40"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 41"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 42"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 43"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 44"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 45"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 46"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 47"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"TLB conflict abort"		},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"Unsupported atomic hardware update fault"	},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 50"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 51"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"implementation fault (lockdown abort)" },
+	{ do_bad,		SIGBUS,  BUS_OBJERR,	"implementation fault (unsupported exclusive)" },
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 54"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 55"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 56"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 57"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 58" 			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 59"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 60"			},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"section domain fault"		},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"page domain fault"		},
+	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 63"			},
 };
 
 int handle_guest_sea(phys_addr_t addr, unsigned int esr)
@@ -698,19 +681,17 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 	if (!inf->fn(addr, esr, regs))
 		return;
 
-	pr_alert("Unhandled fault: %s at 0x%016lx\n",
-		 inf->name, addr);
-
-	mem_abort_decode(esr);
-
-	if (!user_mode(regs))
+	if (!user_mode(regs)) {
+		pr_alert("Unhandled fault at 0x%016lx\n", addr);
+		mem_abort_decode(esr);
 		show_pte(addr);
+	}
 
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code  = inf->code;
 	info.si_addr  = (void __user *)addr;
-	arm64_notify_die("", regs, &info, esr);
+	arm64_notify_die(inf->name, regs, &info, esr);
 }
 
 asmlinkage void __exception do_el0_irq_bp_hardening(void)
@@ -741,7 +722,6 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
 					   struct pt_regs *regs)
 {
 	struct siginfo info;
-	struct task_struct *tsk = current;
 
 	if (user_mode(regs)) {
 		if (instruction_pointer(regs) > TASK_SIZE)
@@ -749,17 +729,11 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
 		local_irq_enable();
 	}
 
-	if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS))
-		pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n",
-				    tsk->comm, task_pid_nr(tsk),
-				    esr_get_class_string(esr), (void *)regs->pc,
-				    (void *)regs->sp);
-
 	info.si_signo = SIGBUS;
 	info.si_errno = 0;
 	info.si_code  = BUS_ADRALN;
 	info.si_addr  = (void __user *)addr;
-	arm64_notify_die("Oops - SP/PC alignment exception", regs, &info, esr);
+	arm64_notify_die("SP/PC alignment exception", regs, &info, esr);
 }
 
 int __init early_brk64(unsigned long addr, unsigned int esr,
@@ -774,11 +748,11 @@ static struct fault_info __refdata debug_fault_info[] = {
 	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware breakpoint"	},
 	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware single-step"	},
 	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware watchpoint"	},
-	{ do_bad,	SIGBUS,		BUS_FIXME,	"unknown 3"		},
+	{ do_bad,	SIGKILL,	SI_KERNEL,	"unknown 3"		},
 	{ do_bad,	SIGTRAP,	TRAP_BRKPT,	"aarch32 BKPT"		},
-	{ do_bad,	SIGTRAP,	TRAP_FIXME,	"aarch32 vector catch"	},
+	{ do_bad,	SIGKILL,	SI_KERNEL,	"aarch32 vector catch"	},
 	{ early_brk64,	SIGTRAP,	TRAP_BRKPT,	"aarch64 BRK"		},
-	{ do_bad,	SIGBUS,		BUS_FIXME,	"unknown 7"		},
+	{ do_bad,	SIGKILL,	SI_KERNEL,	"unknown 7"		},
 };
 
 void __init hook_debug_fault_code(int nr,
@@ -814,14 +788,11 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
 	if (!inf->fn(addr, esr, regs)) {
 		rv = 1;
 	} else {
-		pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
-			 inf->name, esr, addr);
-
 		info.si_signo = inf->sig;
 		info.si_errno = 0;
 		info.si_code  = inf->code;
 		info.si_addr  = (void __user *)addr;
-		arm64_notify_die("", regs, &info, 0);
+		arm64_notify_die(inf->name, regs, &info, esr);
 		rv = 0;
 	}
 
@@ -833,7 +804,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
 NOKPROBE_SYMBOL(do_debug_exception);
 
 #ifdef CONFIG_ARM64_PAN
-int cpu_enable_pan(void *__unused)
+void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
 {
 	/*
 	 * We modify PSTATE. This won't work from irq context as the PSTATE
@@ -843,6 +814,5 @@ int cpu_enable_pan(void *__unused)
 
 	config_sctlr_el1(SCTLR_EL1_SPAN, 0);
 	asm(SET_PSTATE_PAN(1));
-	return 0;
 }
 #endif /* CONFIG_ARM64_PAN */
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c0af47617299..5f9a73a4452c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -36,6 +36,12 @@
 #define TCR_TG_FLAGS	TCR_TG0_4K | TCR_TG1_4K
 #endif
 
+#ifdef CONFIG_RANDOMIZE_BASE
+#define TCR_KASLR_FLAGS	TCR_NFD1
+#else
+#define TCR_KASLR_FLAGS	0
+#endif
+
 #define TCR_SMP_FLAGS	TCR_SHARED
 
 /* PTWs cacheable, inner/outer WBWA */
@@ -432,7 +438,8 @@ ENTRY(__cpu_setup)
 	 * both user and kernel.
 	 */
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
-			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1
+			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
+			TCR_TBI0 | TCR_A1
 	tcr_set_idmap_t0sz	x10, x9
 
 	/*
@@ -441,16 +448,15 @@ ENTRY(__cpu_setup)
 	tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6
 #ifdef CONFIG_ARM64_HW_AFDBM
 	/*
-	 * Hardware update of the Access and Dirty bits.
+	 * Enable hardware update of the Access Flags bit.
+	 * Hardware dirty bit management is enabled later,
+	 * via capabilities.
 	 */
 	mrs	x9, ID_AA64MMFR1_EL1
 	and	x9, x9, #0xf
-	cbz	x9, 2f
-	cmp	x9, #2
-	b.lt	1f
-	orr	x10, x10, #TCR_HD		// hardware Dirty flag update
-1:	orr	x10, x10, #TCR_HA		// hardware Access flag update
-2:
+	cbz	x9, 1f
+	orr	x10, x10, #TCR_HA		// hardware Access flag update
+1:
 #endif	/* CONFIG_ARM64_HW_AFDBM */
 	msr	tcr_el1, x10
 	ret					// return to head.S
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 339df7324e9c..9ecad05bfc73 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -27,6 +27,7 @@ config OPENRISC
 	select GENERIC_STRNLEN_USER
 	select GENERIC_SMP_IDLE_THREAD
 	select MODULES_USE_ELF_RELA
+	select MULTI_IRQ_HANDLER
 	select HAVE_DEBUG_STACKOVERFLOW
 	select OR1K_PIC
 	select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
@@ -68,6 +69,9 @@ config STACKTRACE_SUPPORT
 config LOCKDEP_SUPPORT
 	def_bool  y
 
+config MULTI_IRQ_HANDLER
+	def_bool y
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 04807c7f64cc..23d8acca5c90 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -33,6 +33,7 @@ config RISCV
 	select MODULES_USE_ELF_RELA if MODULES
 	select THREAD_INFO_IN_TASK
 	select RISCV_TIMER
+	select GENERIC_IRQ_MULTI_HANDLER
 
 config MMU
 	def_bool y
@@ -114,6 +115,9 @@ config ARCH_RV64I
 	select 64BIT
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 
 endchoice
 
@@ -131,6 +135,10 @@ choice
 		bool "medium any code model"
 endchoice
 
+config MODULE_SECTIONS
+	bool
+	select HAVE_MOD_ARCH_SPECIFIC
+
 choice
 	prompt "Maximum Physical Memory"
 	default MAXPHYSMEM_2GB if 32BIT
@@ -141,6 +149,7 @@ choice
 		bool "2GiB"
 	config MAXPHYSMEM_128GB
 		depends on 64BIT && CMODEL_MEDANY
+		select MODULE_SECTIONS if MODULES
 		bool "128GiB"
 endchoice
 
@@ -281,7 +290,7 @@ config CMDLINE_BOOL
 	  in CONFIG_CMDLINE.
 
 	  The built-in options will be concatenated to the default command
-	  line if CMDLINE_OVERRIDE is set to 'N'. Otherwise, the default
+	  line if CMDLINE_FORCE is set to 'N'. Otherwise, the default
 	  command line will be ignored and replaced by the built-in string.
 
 config CMDLINE
@@ -291,7 +300,7 @@ config CMDLINE
 	help
 	  Supply command-line options at build time by entering them here.
 
-config CMDLINE_OVERRIDE
+config CMDLINE_FORCE
 	bool "Built-in command line overrides bootloader arguments"
 	depends on CMDLINE_BOOL
 	help
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 6719dd30ec5b..76e958a5414a 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -11,6 +11,9 @@
 LDFLAGS         :=
 OBJCOPYFLAGS    := -O binary
 LDFLAGS_vmlinux :=
+ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+	LDFLAGS_vmlinux := --no-relax
+endif
 KBUILD_AFLAGS_MODULE += -fPIC
 KBUILD_CFLAGS_MODULE += -fPIC
 
@@ -56,6 +59,11 @@ endif
 ifeq ($(CONFIG_CMODEL_MEDANY),y)
 	KBUILD_CFLAGS += -mcmodel=medany
 endif
+ifeq ($(CONFIG_MODULE_SECTIONS),y)
+	KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/riscv/kernel/module.lds
+endif
+
+KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
 
 # GCC versions that support the "-mstrict-align" option default to allowing
 # unaligned accesses.  While unaligned accesses are explicitly allowed in the
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 47dacf06c679..bca0eee733b0 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -73,3 +73,5 @@ CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
 # CONFIG_RCU_TRACE is not set
 CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 4286a5f83876..1e5fd280fb4d 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hash.h
+generic-y += handle_irq.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index e65d1cd89e28..855115ace98c 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -24,6 +24,20 @@
 #include <asm/barrier.h>
 
 #define ATOMIC_INIT(i)	{ (i) }
+
+#define __atomic_op_acquire(op, args...)				\
+({									\
+	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
+	__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory");	\
+	__ret;								\
+})
+
+#define __atomic_op_release(op, args...)				\
+({									\
+	__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");	\
+	op##_relaxed(args);						\
+})
+
 static __always_inline int atomic_read(const atomic_t *v)
 {
 	return READ_ONCE(v->counter);
@@ -50,22 +64,23 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
  * have the AQ or RL bits set.  These don't return anything, so there's only
  * one version to worry about.
  */
-#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)				\
-static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
-{											\
-	__asm__ __volatile__ (								\
-		"amo" #asm_op "." #asm_type " zero, %1, %0"				\
-		: "+A" (v->counter)							\
-		: "r" (I)								\
-		: "memory");								\
-}
+#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)		\
+static __always_inline							\
+void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)		\
+{									\
+	__asm__ __volatile__ (						\
+		"	amo" #asm_op "." #asm_type " zero, %1, %0"	\
+		: "+A" (v->counter)					\
+		: "r" (I)						\
+		: "memory");						\
+}									\
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, I)			\
+#define ATOMIC_OPS(op, asm_op, I)					\
         ATOMIC_OP (op, asm_op, I, w,  int,   )
 #else
-#define ATOMIC_OPS(op, asm_op, I)			\
-        ATOMIC_OP (op, asm_op, I, w,  int,   )	\
+#define ATOMIC_OPS(op, asm_op, I)					\
+        ATOMIC_OP (op, asm_op, I, w,  int,   )				\
         ATOMIC_OP (op, asm_op, I, d, long, 64)
 #endif
 
@@ -79,75 +94,115 @@ ATOMIC_OPS(xor, xor,  i)
 #undef ATOMIC_OPS
 
 /*
- * Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * Atomic ops that have ordered, relaxed, acquire, and release variants.
  * There's two flavors of these: the arithmatic ops have both fetch and return
  * versions, while the logical ops only have fetch versions.
  */
-#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix)				\
-static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v)	\
-{													\
-	register c_type ret;										\
-	__asm__ __volatile__ (										\
-		"amo" #asm_op "." #asm_type #asm_or " %1, %2, %0"					\
-		: "+A" (v->counter), "=r" (ret)								\
-		: "r" (I)										\
-		: "memory");										\
-	return ret;											\
+#define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)	\
+static __always_inline							\
+c_type atomic##prefix##_fetch_##op##_relaxed(c_type i,			\
+					     atomic##prefix##_t *v)	\
+{									\
+	register c_type ret;						\
+	__asm__ __volatile__ (						\
+		"	amo" #asm_op "." #asm_type " %1, %2, %0"	\
+		: "+A" (v->counter), "=r" (ret)				\
+		: "r" (I)						\
+		: "memory");						\
+	return ret;							\
+}									\
+static __always_inline							\
+c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
+{									\
+	register c_type ret;						\
+	__asm__ __volatile__ (						\
+		"	amo" #asm_op "." #asm_type ".aqrl  %1, %2, %0"	\
+		: "+A" (v->counter), "=r" (ret)				\
+		: "r" (I)						\
+		: "memory");						\
+	return ret;							\
 }
 
-#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix)			\
-static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, atomic##prefix##_t *v)	\
-{													\
-        return atomic##prefix##_fetch_##op##c_or(i, v) c_op I;						\
+#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)	\
+static __always_inline							\
+c_type atomic##prefix##_##op##_return_relaxed(c_type i,			\
+					      atomic##prefix##_t *v)	\
+{									\
+        return atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I;	\
+}									\
+static __always_inline							\
+c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v)	\
+{									\
+        return atomic##prefix##_fetch_##op(i, v) c_op I;		\
 }
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, w,  int,   )	\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )
+#define ATOMIC_OPS(op, asm_op, c_op, I)					\
+        ATOMIC_FETCH_OP( op, asm_op,       I, w,  int,   )		\
+        ATOMIC_OP_RETURN(op, asm_op, c_op, I, w,  int,   )
 #else
-#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, w,  int,   )	\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w,  int,   )	\
-        ATOMIC_FETCH_OP (op, asm_op,       I, asm_or, c_or, d, long, 64)	\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
+#define ATOMIC_OPS(op, asm_op, c_op, I)					\
+        ATOMIC_FETCH_OP( op, asm_op,       I, w,  int,   )		\
+        ATOMIC_OP_RETURN(op, asm_op, c_op, I, w,  int,   )		\
+        ATOMIC_FETCH_OP( op, asm_op,       I, d, long, 64)		\
+        ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64)
 #endif
 
-ATOMIC_OPS(add, add, +,  i,      , _relaxed)
-ATOMIC_OPS(add, add, +,  i, .aq  , _acquire)
-ATOMIC_OPS(add, add, +,  i, .rl  , _release)
-ATOMIC_OPS(add, add, +,  i, .aqrl,         )
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_add_return		atomic_add_return
+#define atomic_sub_return		atomic_sub_return
 
-ATOMIC_OPS(sub, add, +, -i,      , _relaxed)
-ATOMIC_OPS(sub, add, +, -i, .aq  , _acquire)
-ATOMIC_OPS(sub, add, +, -i, .rl  , _release)
-ATOMIC_OPS(sub, add, +, -i, .aqrl,         )
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+#define atomic_fetch_add		atomic_fetch_add
+#define atomic_fetch_sub		atomic_fetch_sub
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_add_return		atomic64_add_return
+#define atomic64_sub_return		atomic64_sub_return
+
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+#define atomic64_fetch_add		atomic64_fetch_add
+#define atomic64_fetch_sub		atomic64_fetch_sub
+#endif
 
 #undef ATOMIC_OPS
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w,  int,   )
+#define ATOMIC_OPS(op, asm_op, I)					\
+        ATOMIC_FETCH_OP(op, asm_op, I, w,  int,   )
 #else
-#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or)				\
-        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w,  int,   )	\
-        ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64)
+#define ATOMIC_OPS(op, asm_op, I)					\
+        ATOMIC_FETCH_OP(op, asm_op, I, w,  int,   )			\
+        ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64)
 #endif
 
-ATOMIC_OPS(and, and, i,      , _relaxed)
-ATOMIC_OPS(and, and, i, .aq  , _acquire)
-ATOMIC_OPS(and, and, i, .rl  , _release)
-ATOMIC_OPS(and, and, i, .aqrl,         )
+ATOMIC_OPS(and, and, i)
+ATOMIC_OPS( or,  or, i)
+ATOMIC_OPS(xor, xor, i)
 
-ATOMIC_OPS( or,  or, i,      , _relaxed)
-ATOMIC_OPS( or,  or, i, .aq  , _acquire)
-ATOMIC_OPS( or,  or, i, .rl  , _release)
-ATOMIC_OPS( or,  or, i, .aqrl,         )
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+#define atomic_fetch_and		atomic_fetch_and
+#define atomic_fetch_or			atomic_fetch_or
+#define atomic_fetch_xor		atomic_fetch_xor
 
-ATOMIC_OPS(xor, xor, i,      , _relaxed)
-ATOMIC_OPS(xor, xor, i, .aq  , _acquire)
-ATOMIC_OPS(xor, xor, i, .rl  , _release)
-ATOMIC_OPS(xor, xor, i, .aqrl,         )
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+#define atomic64_fetch_and		atomic64_fetch_and
+#define atomic64_fetch_or		atomic64_fetch_or
+#define atomic64_fetch_xor		atomic64_fetch_xor
+#endif
 
 #undef ATOMIC_OPS
 
@@ -157,22 +212,24 @@ ATOMIC_OPS(xor, xor, i, .aqrl,         )
 /*
  * The extra atomic operations that are constructed from one of the core
  * AMO-based operations above (aside from sub, which is easier to fit above).
- * These are required to perform a barrier, but they're OK this way because
- * atomic_*_return is also required to perform a barrier.
+ * These are required to perform a full barrier, but they're OK this way
+ * because atomic_*_return is also required to perform a full barrier.
+ *
  */
-#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix)			\
-static __always_inline bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
-{										\
-	return atomic##prefix##_##func_op##_return(i, v) comp_op I;		\
+#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix)		\
+static __always_inline							\
+bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)		\
+{									\
+	return atomic##prefix##_##func_op##_return(i, v) comp_op I;	\
 }
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, func_op, comp_op, I)			\
-        ATOMIC_OP (op, func_op, comp_op, I,  int,   )
+#define ATOMIC_OPS(op, func_op, comp_op, I)				\
+        ATOMIC_OP(op, func_op, comp_op, I,  int,   )
 #else
-#define ATOMIC_OPS(op, func_op, comp_op, I)			\
-        ATOMIC_OP (op, func_op, comp_op, I,  int,   )		\
-        ATOMIC_OP (op, func_op, comp_op, I, long, 64)
+#define ATOMIC_OPS(op, func_op, comp_op, I)				\
+        ATOMIC_OP(op, func_op, comp_op, I,  int,   )			\
+        ATOMIC_OP(op, func_op, comp_op, I, long, 64)
 #endif
 
 ATOMIC_OPS(add_and_test, add, ==, 0)
@@ -182,51 +239,87 @@ ATOMIC_OPS(add_negative, add,  <, 0)
 #undef ATOMIC_OP
 #undef ATOMIC_OPS
 
-#define ATOMIC_OP(op, func_op, I, c_type, prefix)				\
-static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v)	\
-{										\
-	atomic##prefix##_##func_op(I, v);					\
+#define ATOMIC_OP(op, func_op, I, c_type, prefix)			\
+static __always_inline							\
+void atomic##prefix##_##op(atomic##prefix##_t *v)			\
+{									\
+	atomic##prefix##_##func_op(I, v);				\
 }
 
-#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix)					\
-static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v)	\
-{											\
-	return atomic##prefix##_fetch_##func_op(I, v);					\
+#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix)			\
+static __always_inline							\
+c_type atomic##prefix##_fetch_##op##_relaxed(atomic##prefix##_t *v)	\
+{									\
+	return atomic##prefix##_fetch_##func_op##_relaxed(I, v);	\
+}									\
+static __always_inline							\
+c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v)		\
+{									\
+	return atomic##prefix##_fetch_##func_op(I, v);			\
 }
 
-#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix)				\
-static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v)	\
-{											\
-        return atomic##prefix##_fetch_##op(v) c_op I;					\
+#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix)		\
+static __always_inline							\
+c_type atomic##prefix##_##op##_return_relaxed(atomic##prefix##_t *v)	\
+{									\
+        return atomic##prefix##_fetch_##op##_relaxed(v) c_op I;		\
+}									\
+static __always_inline							\
+c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v)		\
+{									\
+        return atomic##prefix##_fetch_##op(v) c_op I;			\
 }
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I)						\
-        ATOMIC_OP       (op, asm_op,       I,  int,   )				\
-        ATOMIC_FETCH_OP (op, asm_op,       I,  int,   )				\
+#define ATOMIC_OPS(op, asm_op, c_op, I)					\
+        ATOMIC_OP(       op, asm_op,       I,  int,   )			\
+        ATOMIC_FETCH_OP( op, asm_op,       I,  int,   )			\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )
 #else
-#define ATOMIC_OPS(op, asm_op, c_op, I)						\
-        ATOMIC_OP       (op, asm_op,       I,  int,   )				\
-        ATOMIC_FETCH_OP (op, asm_op,       I,  int,   )				\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )				\
-        ATOMIC_OP       (op, asm_op,       I, long, 64)				\
-        ATOMIC_FETCH_OP (op, asm_op,       I, long, 64)				\
+#define ATOMIC_OPS(op, asm_op, c_op, I)					\
+        ATOMIC_OP(       op, asm_op,       I,  int,   )			\
+        ATOMIC_FETCH_OP( op, asm_op,       I,  int,   )			\
+        ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )			\
+        ATOMIC_OP(       op, asm_op,       I, long, 64)			\
+        ATOMIC_FETCH_OP( op, asm_op,       I, long, 64)			\
         ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
 #endif
 
 ATOMIC_OPS(inc, add, +,  1)
 ATOMIC_OPS(dec, add, +, -1)
 
+#define atomic_inc_return_relaxed	atomic_inc_return_relaxed
+#define atomic_dec_return_relaxed	atomic_dec_return_relaxed
+#define atomic_inc_return		atomic_inc_return
+#define atomic_dec_return		atomic_dec_return
+
+#define atomic_fetch_inc_relaxed	atomic_fetch_inc_relaxed
+#define atomic_fetch_dec_relaxed	atomic_fetch_dec_relaxed
+#define atomic_fetch_inc		atomic_fetch_inc
+#define atomic_fetch_dec		atomic_fetch_dec
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define atomic64_inc_return_relaxed	atomic64_inc_return_relaxed
+#define atomic64_dec_return_relaxed	atomic64_dec_return_relaxed
+#define atomic64_inc_return		atomic64_inc_return
+#define atomic64_dec_return		atomic64_dec_return
+
+#define atomic64_fetch_inc_relaxed	atomic64_fetch_inc_relaxed
+#define atomic64_fetch_dec_relaxed	atomic64_fetch_dec_relaxed
+#define atomic64_fetch_inc		atomic64_fetch_inc
+#define atomic64_fetch_dec		atomic64_fetch_dec
+#endif
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
-#define ATOMIC_OP(op, func_op, comp_op, I, prefix)				\
-static __always_inline bool atomic##prefix##_##op(atomic##prefix##_t *v)	\
-{										\
-	return atomic##prefix##_##func_op##_return(v) comp_op I;		\
+#define ATOMIC_OP(op, func_op, comp_op, I, prefix)			\
+static __always_inline							\
+bool atomic##prefix##_##op(atomic##prefix##_t *v)			\
+{									\
+	return atomic##prefix##_##func_op##_return(v) comp_op I;	\
 }
 
 ATOMIC_OP(inc_and_test, inc, ==, 0,   )
@@ -238,19 +331,19 @@ ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
 
 #undef ATOMIC_OP
 
-/* This is required to provide a barrier on success. */
+/* This is required to provide a full barrier on success. */
 static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
        int prev, rc;
 
 	__asm__ __volatile__ (
-		"0:\n\t"
-		"lr.w.aqrl  %[p],  %[c]\n\t"
-		"beq        %[p],  %[u], 1f\n\t"
-		"add       %[rc],  %[p], %[a]\n\t"
-		"sc.w.aqrl %[rc], %[rc], %[c]\n\t"
-		"bnez      %[rc], 0b\n\t"
-		"1:"
+		"0:	lr.w     %[p],  %[c]\n"
+		"	beq      %[p],  %[u], 1f\n"
+		"	add      %[rc], %[p], %[a]\n"
+		"	sc.w.rl  %[rc], %[rc], %[c]\n"
+		"	bnez     %[rc], 0b\n"
+		"	fence    rw, rw\n"
+		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		: [a]"r" (a), [u]"r" (u)
 		: "memory");
@@ -263,13 +356,13 @@ static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u)
        long prev, rc;
 
 	__asm__ __volatile__ (
-		"0:\n\t"
-		"lr.d.aqrl  %[p],  %[c]\n\t"
-		"beq        %[p],  %[u], 1f\n\t"
-		"add       %[rc],  %[p], %[a]\n\t"
-		"sc.d.aqrl %[rc], %[rc], %[c]\n\t"
-		"bnez      %[rc], 0b\n\t"
-		"1:"
+		"0:	lr.d     %[p],  %[c]\n"
+		"	beq      %[p],  %[u], 1f\n"
+		"	add      %[rc], %[p], %[a]\n"
+		"	sc.d.rl  %[rc], %[rc], %[c]\n"
+		"	bnez     %[rc], 0b\n"
+		"	fence    rw, rw\n"
+		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		: [a]"r" (a), [u]"r" (u)
 		: "memory");
@@ -300,37 +393,63 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
 
 /*
  * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
- * {cmp,}xchg and the operations that return, so they need a barrier.
- */
-/*
- * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
- * assigning the same barrier to both the LR and SC operations, but that might
- * not make any sense.  We're waiting on a memory model specification to
- * determine exactly what the right thing to do is here.
+ * {cmp,}xchg and the operations that return, so they need a full barrier.
  */
-#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or)						\
-static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) 	\
-{												\
-	return __cmpxchg(&(v->counter), o, n, size, asm_or, asm_or);				\
-}												\
-static __always_inline c_t atomic##prefix##_xchg##c_or(atomic##prefix##_t *v, c_t n) 		\
-{												\
-	return __xchg(n, &(v->counter), size, asm_or);						\
+#define ATOMIC_OP(c_t, prefix, size)					\
+static __always_inline							\
+c_t atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n)		\
+{									\
+	return __xchg_relaxed(&(v->counter), n, size);			\
+}									\
+static __always_inline							\
+c_t atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n)		\
+{									\
+	return __xchg_acquire(&(v->counter), n, size);			\
+}									\
+static __always_inline							\
+c_t atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n)		\
+{									\
+	return __xchg_release(&(v->counter), n, size);			\
+}									\
+static __always_inline							\
+c_t atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n)			\
+{									\
+	return __xchg(&(v->counter), n, size);				\
+}									\
+static __always_inline							\
+c_t atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v,		\
+				     c_t o, c_t n)			\
+{									\
+	return __cmpxchg_relaxed(&(v->counter), o, n, size);		\
+}									\
+static __always_inline							\
+c_t atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v,		\
+				     c_t o, c_t n)			\
+{									\
+	return __cmpxchg_acquire(&(v->counter), o, n, size);		\
+}									\
+static __always_inline							\
+c_t atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v,		\
+				     c_t o, c_t n)			\
+{									\
+	return __cmpxchg_release(&(v->counter), o, n, size);		\
+}									\
+static __always_inline							\
+c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n)	\
+{									\
+	return __cmpxchg(&(v->counter), o, n, size);			\
 }
 
 #ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(c_or, asm_or)			\
-	ATOMIC_OP( int,   , c_or, 4, asm_or)
+#define ATOMIC_OPS()							\
+	ATOMIC_OP( int,   , 4)
 #else
-#define ATOMIC_OPS(c_or, asm_or)			\
-	ATOMIC_OP( int,   , c_or, 4, asm_or)		\
-	ATOMIC_OP(long, 64, c_or, 8, asm_or)
+#define ATOMIC_OPS()							\
+	ATOMIC_OP( int,   , 4)						\
+	ATOMIC_OP(long, 64, 8)
 #endif
 
-ATOMIC_OPS(        , .aqrl)
-ATOMIC_OPS(_acquire,   .aq)
-ATOMIC_OPS(_release,   .rl)
-ATOMIC_OPS(_relaxed,      )
+ATOMIC_OPS()
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP
@@ -340,13 +459,13 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
        int prev, rc;
 
 	__asm__ __volatile__ (
-		"0:\n\t"
-		"lr.w.aqrl  %[p],  %[c]\n\t"
-		"sub       %[rc],  %[p], %[o]\n\t"
-		"bltz      %[rc],    1f\n\t"
-		"sc.w.aqrl %[rc], %[rc], %[c]\n\t"
-		"bnez      %[rc],    0b\n\t"
-		"1:"
+		"0:	lr.w     %[p],  %[c]\n"
+		"	sub      %[rc], %[p], %[o]\n"
+		"	bltz     %[rc], 1f\n"
+		"	sc.w.rl  %[rc], %[rc], %[c]\n"
+		"	bnez     %[rc], 0b\n"
+		"	fence    rw, rw\n"
+		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		: [o]"r" (offset)
 		: "memory");
@@ -361,13 +480,13 @@ static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset)
        long prev, rc;
 
 	__asm__ __volatile__ (
-		"0:\n\t"
-		"lr.d.aqrl  %[p],  %[c]\n\t"
-		"sub       %[rc],  %[p], %[o]\n\t"
-		"bltz      %[rc],    1f\n\t"
-		"sc.d.aqrl %[rc], %[rc], %[c]\n\t"
-		"bnez      %[rc],    0b\n\t"
-		"1:"
+		"0:	lr.d     %[p],  %[c]\n"
+		"	sub      %[rc], %[p], %[o]\n"
+		"	bltz     %[rc], 1f\n"
+		"	sc.d.rl  %[rc], %[rc], %[c]\n"
+		"	bnez     %[rc], 0b\n"
+		"	fence    rw, rw\n"
+		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		: [o]"r" (offset)
 		: "memory");
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 5510366d169a..d4628e4b3a5e 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,6 +38,21 @@
 #define __smp_rmb()	RISCV_FENCE(r,r)
 #define __smp_wmb()	RISCV_FENCE(w,w)
 
+#define __smp_store_release(p, v)					\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	RISCV_FENCE(rw,w);						\
+	WRITE_ONCE(*p, v);						\
+} while (0)
+
+#define __smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	RISCV_FENCE(r,rw);						\
+	___p1;								\
+})
+
 /*
  * This is a very specific barrier: it's currently only used in two places in
  * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index db249dbc7b97..c12833f7b6bd 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -17,45 +17,153 @@
 #include <linux/bug.h>
 
 #include <asm/barrier.h>
+#include <asm/fence.h>
 
-#define __xchg(new, ptr, size, asm_or)				\
-({								\
-	__typeof__(ptr) __ptr = (ptr);				\
-	__typeof__(new) __new = (new);				\
-	__typeof__(*(ptr)) __ret;				\
-	switch (size) {						\
-	case 4:							\
-		__asm__ __volatile__ (				\
-			"amoswap.w" #asm_or " %0, %2, %1"	\
-			: "=r" (__ret), "+A" (*__ptr)		\
-			: "r" (__new)				\
-			: "memory");				\
-		break;						\
-	case 8:							\
-		__asm__ __volatile__ (				\
-			"amoswap.d" #asm_or " %0, %2, %1"	\
-			: "=r" (__ret), "+A" (*__ptr)		\
-			: "r" (__new)				\
-			: "memory");				\
-		break;						\
-	default:						\
-		BUILD_BUG();					\
-	}							\
-	__ret;							\
-})
-
-#define xchg(ptr, x)    (__xchg((x), (ptr), sizeof(*(ptr)), .aqrl))
-
-#define xchg32(ptr, x)				\
-({						\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 4);	\
-	xchg((ptr), (x));			\
-})
-
-#define xchg64(ptr, x)				\
-({						\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);	\
-	xchg((ptr), (x));			\
+#define __xchg_relaxed(ptr, new, size)					\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(new) __new = (new);					\
+	__typeof__(*(ptr)) __ret;					\
+	switch (size) {							\
+	case 4:								\
+		__asm__ __volatile__ (					\
+			"	amoswap.w %0, %2, %1\n"			\
+			: "=r" (__ret), "+A" (*__ptr)			\
+			: "r" (__new)					\
+			: "memory");					\
+		break;							\
+	case 8:								\
+		__asm__ __volatile__ (					\
+			"	amoswap.d %0, %2, %1\n"			\
+			: "=r" (__ret), "+A" (*__ptr)			\
+			: "r" (__new)					\
+			: "memory");					\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	__ret;								\
+})
+
+#define xchg_relaxed(ptr, x)						\
+({									\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
+					    _x_, sizeof(*(ptr)));	\
+})
+
+#define __xchg_acquire(ptr, new, size)					\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(new) __new = (new);					\
+	__typeof__(*(ptr)) __ret;					\
+	switch (size) {							\
+	case 4:								\
+		__asm__ __volatile__ (					\
+			"	amoswap.w %0, %2, %1\n"			\
+			RISCV_ACQUIRE_BARRIER				\
+			: "=r" (__ret), "+A" (*__ptr)			\
+			: "r" (__new)					\
+			: "memory");					\
+		break;							\
+	case 8:								\
+		__asm__ __volatile__ (					\
+			"	amoswap.d %0, %2, %1\n"			\
+			RISCV_ACQUIRE_BARRIER				\
+			: "=r" (__ret), "+A" (*__ptr)			\
+			: "r" (__new)					\
+			: "memory");					\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	__ret;								\
+})
+
+#define xchg_acquire(ptr, x)						\
+({									\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	(__typeof__(*(ptr))) __xchg_acquire((ptr),			\
+					    _x_, sizeof(*(ptr)));	\
+})
+
+#define __xchg_release(ptr, new, size)					\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(new) __new = (new);					\
+	__typeof__(*(ptr)) __ret;					\
+	switch (size) {							\
+	case 4:								\
+		__asm__ __volatile__ (					\
+			RISCV_RELEASE_BARRIER				\
+			"	amoswap.w %0, %2, %1\n"			\
+			: "=r" (__ret), "+A" (*__ptr)			\
+			: "r" (__new)					\
+			: "memory");					\
+		break;							\
+	case 8:								\
+		__asm__ __volatile__ (					\
+			RISCV_RELEASE_BARRIER				\
+			"	amoswap.d %0, %2, %1\n"			\
+			: "=r" (__ret), "+A" (*__ptr)			\
+			: "r" (__new)					\
+			: "memory");					\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	__ret;								\
+})
+
+#define xchg_release(ptr, x)						\
+({									\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	(__typeof__(*(ptr))) __xchg_release((ptr),			\
+					    _x_, sizeof(*(ptr)));	\
+})
+
+#define __xchg(ptr, new, size)						\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(new) __new = (new);					\
+	__typeof__(*(ptr)) __ret;					\
+	switch (size) {							\
+	case 4:								\
+		__asm__ __volatile__ (					\
+			"	amoswap.w.aqrl %0, %2, %1\n"		\
+			: "=r" (__ret), "+A" (*__ptr)			\
+			: "r" (__new)					\
+			: "memory");					\
+		break;							\
+	case 8:								\
+		__asm__ __volatile__ (					\
+			"	amoswap.d.aqrl %0, %2, %1\n"		\
+			: "=r" (__ret), "+A" (*__ptr)			\
+			: "r" (__new)					\
+			: "memory");					\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	__ret;								\
+})
+
+#define xchg(ptr, x)							\
+({									\
+	__typeof__(*(ptr)) _x_ = (x);					\
+	(__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr)));	\
+})
+
+#define xchg32(ptr, x)							\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
+	xchg((ptr), (x));						\
+})
+
+#define xchg64(ptr, x)							\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	xchg((ptr), (x));						\
 })
 
 /*
@@ -63,7 +171,51 @@
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  */
-#define __cmpxchg(ptr, old, new, size, lrb, scb)			\
+#define __cmpxchg_relaxed(ptr, old, new, size)				\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(*(ptr)) __old = (old);				\
+	__typeof__(*(ptr)) __new = (new);				\
+	__typeof__(*(ptr)) __ret;					\
+	register unsigned int __rc;					\
+	switch (size) {							\
+	case 4:								\
+		__asm__ __volatile__ (					\
+			"0:	lr.w %0, %2\n"				\
+			"	bne  %0, %z3, 1f\n"			\
+			"	sc.w %1, %z4, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			"1:\n"						\
+			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
+			: "rJ" (__old), "rJ" (__new)			\
+			: "memory");					\
+		break;							\
+	case 8:								\
+		__asm__ __volatile__ (					\
+			"0:	lr.d %0, %2\n"				\
+			"	bne %0, %z3, 1f\n"			\
+			"	sc.d %1, %z4, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			"1:\n"						\
+			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
+			: "rJ" (__old), "rJ" (__new)			\
+			: "memory");					\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	__ret;								\
+})
+
+#define cmpxchg_relaxed(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg_relaxed((ptr),			\
+					_o_, _n_, sizeof(*(ptr)));	\
+})
+
+#define __cmpxchg_acquire(ptr, old, new, size)				\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
 	__typeof__(*(ptr)) __old = (old);				\
@@ -73,24 +225,24 @@
 	switch (size) {							\
 	case 4:								\
 		__asm__ __volatile__ (					\
-		"0:"							\
-			"lr.w" #scb " %0, %2\n"				\
-			"bne         %0, %z3, 1f\n"			\
-			"sc.w" #lrb " %1, %z4, %2\n"			\
-			"bnez        %1, 0b\n"				\
-		"1:"							\
+			"0:	lr.w %0, %2\n"				\
+			"	bne  %0, %z3, 1f\n"			\
+			"	sc.w %1, %z4, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			RISCV_ACQUIRE_BARRIER				\
+			"1:\n"						\
 			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
 			: "rJ" (__old), "rJ" (__new)			\
 			: "memory");					\
 		break;							\
 	case 8:								\
 		__asm__ __volatile__ (					\
-		"0:"							\
-			"lr.d" #scb " %0, %2\n"				\
-			"bne         %0, %z3, 1f\n"			\
-			"sc.d" #lrb " %1, %z4, %2\n"			\
-			"bnez        %1, 0b\n"				\
-		"1:"							\
+			"0:	lr.d %0, %2\n"				\
+			"	bne %0, %z3, 1f\n"			\
+			"	sc.d %1, %z4, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			RISCV_ACQUIRE_BARRIER				\
+			"1:\n"						\
 			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
 			: "rJ" (__old), "rJ" (__new)			\
 			: "memory");					\
@@ -101,34 +253,131 @@
 	__ret;								\
 })
 
-#define cmpxchg(ptr, o, n) \
-	(__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), .aqrl, .aqrl))
+#define cmpxchg_acquire(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg_acquire((ptr),			\
+					_o_, _n_, sizeof(*(ptr)));	\
+})
 
-#define cmpxchg_local(ptr, o, n) \
-	(__cmpxchg((ptr), (o), (n), sizeof(*(ptr)), , ))
+#define __cmpxchg_release(ptr, old, new, size)				\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(*(ptr)) __old = (old);				\
+	__typeof__(*(ptr)) __new = (new);				\
+	__typeof__(*(ptr)) __ret;					\
+	register unsigned int __rc;					\
+	switch (size) {							\
+	case 4:								\
+		__asm__ __volatile__ (					\
+			RISCV_RELEASE_BARRIER				\
+			"0:	lr.w %0, %2\n"				\
+			"	bne  %0, %z3, 1f\n"			\
+			"	sc.w %1, %z4, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			"1:\n"						\
+			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
+			: "rJ" (__old), "rJ" (__new)			\
+			: "memory");					\
+		break;							\
+	case 8:								\
+		__asm__ __volatile__ (					\
+			RISCV_RELEASE_BARRIER				\
+			"0:	lr.d %0, %2\n"				\
+			"	bne %0, %z3, 1f\n"			\
+			"	sc.d %1, %z4, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			"1:\n"						\
+			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
+			: "rJ" (__old), "rJ" (__new)			\
+			: "memory");					\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	__ret;								\
+})
+
+#define cmpxchg_release(ptr, o, n)					\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg_release((ptr),			\
+					_o_, _n_, sizeof(*(ptr)));	\
+})
+
+#define __cmpxchg(ptr, old, new, size)					\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(*(ptr)) __old = (old);				\
+	__typeof__(*(ptr)) __new = (new);				\
+	__typeof__(*(ptr)) __ret;					\
+	register unsigned int __rc;					\
+	switch (size) {							\
+	case 4:								\
+		__asm__ __volatile__ (					\
+			"0:	lr.w %0, %2\n"				\
+			"	bne  %0, %z3, 1f\n"			\
+			"	sc.w.rl %1, %z4, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			"	fence rw, rw\n"				\
+			"1:\n"						\
+			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
+			: "rJ" (__old), "rJ" (__new)			\
+			: "memory");					\
+		break;							\
+	case 8:								\
+		__asm__ __volatile__ (					\
+			"0:	lr.d %0, %2\n"				\
+			"	bne %0, %z3, 1f\n"			\
+			"	sc.d.rl %1, %z4, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			"	fence rw, rw\n"				\
+			"1:\n"						\
+			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
+			: "rJ" (__old), "rJ" (__new)			\
+			: "memory");					\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+	__ret;								\
+})
 
-#define cmpxchg32(ptr, o, n)			\
-({						\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 4);	\
-	cmpxchg((ptr), (o), (n));		\
+#define cmpxchg(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg((ptr),				\
+				       _o_, _n_, sizeof(*(ptr)));	\
 })
 
-#define cmpxchg32_local(ptr, o, n)		\
-({						\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 4);	\
-	cmpxchg_local((ptr), (o), (n));		\
+#define cmpxchg_local(ptr, o, n)					\
+	(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
+
+#define cmpxchg32(ptr, o, n)						\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
+	cmpxchg((ptr), (o), (n));					\
 })
 
-#define cmpxchg64(ptr, o, n)			\
-({						\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);	\
-	cmpxchg((ptr), (o), (n));		\
+#define cmpxchg32_local(ptr, o, n)					\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
+	cmpxchg_relaxed((ptr), (o), (n))				\
 })
 
-#define cmpxchg64_local(ptr, o, n)		\
-({						\
-	BUILD_BUG_ON(sizeof(*(ptr)) != 8);	\
-	cmpxchg_local((ptr), (o), (n));		\
+#define cmpxchg64(ptr, o, n)						\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg((ptr), (o), (n));					\
+})
+
+#define cmpxchg64_local(ptr, o, n)					\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg_relaxed((ptr), (o), (n));				\
 })
 
 #endif /* _ASM_RISCV_CMPXCHG_H */
diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h
new file mode 100644
index 000000000000..2b443a3a487f
--- /dev/null
+++ b/arch/riscv/include/asm/fence.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_RISCV_FENCE_H
+#define _ASM_RISCV_FENCE_H
+
+#ifdef CONFIG_SMP
+#define RISCV_ACQUIRE_BARRIER		"\tfence r , rw\n"
+#define RISCV_RELEASE_BARRIER		"\tfence rw,  w\n"
+#else
+#define RISCV_ACQUIRE_BARRIER
+#define RISCV_RELEASE_BARRIER
+#endif
+
+#endif	/* _ASM_RISCV_FENCE_H */
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index 66d4175eb13e..c6dcc5291f97 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -8,3 +8,59 @@
 #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER)
 #define HAVE_FUNCTION_GRAPH_FP_TEST
 #endif
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#ifndef __ASSEMBLY__
+void _mcount(void);
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * A general call in RISC-V is a pair of insts:
+ * 1) auipc: setting high-20 pc-related bits to ra register
+ * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
+ *          return address (original pc + 4)
+ *
+ * Dynamic ftrace generates probes to call sites, so we must deal with
+ * both auipc and jalr at the same time.
+ */
+
+#define MCOUNT_ADDR		((unsigned long)_mcount)
+#define JALR_SIGN_MASK		(0x00000800)
+#define JALR_OFFSET_MASK	(0x00000fff)
+#define AUIPC_OFFSET_MASK	(0xfffff000)
+#define AUIPC_PAD		(0x00001000)
+#define JALR_SHIFT		20
+#define JALR_BASIC		(0x000080e7)
+#define AUIPC_BASIC		(0x00000097)
+#define NOP4			(0x00000013)
+
+#define make_call(caller, callee, call)					\
+do {									\
+	call[0] = to_auipc_insn((unsigned int)((unsigned long)callee -	\
+				(unsigned long)caller));		\
+	call[1] = to_jalr_insn((unsigned int)((unsigned long)callee -	\
+			       (unsigned long)caller));			\
+} while (0)
+
+#define to_jalr_insn(offset)						\
+	(((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC)
+
+#define to_auipc_insn(offset)						\
+	((offset & JALR_SIGN_MASK) ?					\
+	(((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) :	\
+	((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC))
+
+/*
+ * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+ */
+#define MCOUNT_INSN_SIZE 8
+#endif
diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h
new file mode 100644
index 000000000000..349df33808c4
--- /dev/null
+++ b/arch/riscv/include/asm/module.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+#ifndef _ASM_RISCV_MODULE_H
+#define _ASM_RISCV_MODULE_H
+
+#include <asm-generic/module.h>
+
+#define MODULE_ARCH_VERMAGIC    "riscv"
+
+u64 module_emit_got_entry(struct module *mod, u64 val);
+u64 module_emit_plt_entry(struct module *mod, u64 val);
+
+#ifdef CONFIG_MODULE_SECTIONS
+struct mod_section {
+	struct elf64_shdr *shdr;
+	int num_entries;
+	int max_entries;
+};
+
+struct mod_arch_specific {
+	struct mod_section got;
+	struct mod_section plt;
+	struct mod_section got_plt;
+};
+
+struct got_entry {
+	u64 symbol_addr;	/* the real variable address */
+};
+
+static inline struct got_entry emit_got_entry(u64 val)
+{
+	return (struct got_entry) {val};
+}
+
+static inline struct got_entry *get_got_entry(u64 val,
+					      const struct mod_section *sec)
+{
+	struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr;
+	int i;
+	for (i = 0; i < sec->num_entries; i++) {
+		if (got[i].symbol_addr == val)
+			return &got[i];
+	}
+	return NULL;
+}
+
+struct plt_entry {
+	/*
+	 * Trampoline code to real target address. The return address
+	 * should be the original (pc+4) before entring plt entry.
+	 */
+	u32 insn_auipc;		/* auipc t0, 0x0                       */
+	u32 insn_ld;		/* ld    t1, 0x10(t0)                  */
+	u32 insn_jr;		/* jr    t1                            */
+};
+
+#define OPC_AUIPC  0x0017
+#define OPC_LD     0x3003
+#define OPC_JALR   0x0067
+#define REG_T0     0x5
+#define REG_T1     0x6
+
+static inline struct plt_entry emit_plt_entry(u64 val, u64 plt, u64 got_plt)
+{
+	/*
+	 * U-Type encoding:
+	 * +------------+----------+----------+
+	 * | imm[31:12] | rd[11:7] | opc[6:0] |
+	 * +------------+----------+----------+
+	 *
+	 * I-Type encoding:
+	 * +------------+------------+--------+----------+----------+
+	 * | imm[31:20] | rs1[19:15] | funct3 | rd[11:7] | opc[6:0] |
+	 * +------------+------------+--------+----------+----------+
+	 *
+	 */
+	u64 offset = got_plt - plt;
+	u32 hi20 = (offset + 0x800) & 0xfffff000;
+	u32 lo12 = (offset - hi20);
+	return (struct plt_entry) {
+		OPC_AUIPC | (REG_T0 << 7) | hi20,
+		OPC_LD | (lo12 << 20) | (REG_T0 << 15) | (REG_T1 << 7),
+		OPC_JALR | (REG_T1 << 15)
+	};
+}
+
+static inline int get_got_plt_idx(u64 val, const struct mod_section *sec)
+{
+	struct got_entry *got_plt = (struct got_entry *)sec->shdr->sh_addr;
+	int i;
+	for (i = 0; i < sec->num_entries; i++) {
+		if (got_plt[i].symbol_addr == val)
+			return i;
+	}
+	return -1;
+}
+
+static inline struct plt_entry *get_plt_entry(u64 val,
+				      const struct mod_section *sec_plt,
+				      const struct mod_section *sec_got_plt)
+{
+	struct plt_entry *plt = (struct plt_entry *)sec_plt->shdr->sh_addr;
+	int got_plt_idx = get_got_plt_idx(val, sec_got_plt);
+	if (got_plt_idx >= 0)
+		return plt + got_plt_idx;
+	else
+		return NULL;
+}
+
+#endif /* CONFIG_MODULE_SECTIONS */
+
+#endif /* _ASM_RISCV_MODULE_H */
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
index 2fd27e8ef1fd..8eb26d1ede81 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -17,6 +17,7 @@
 
 #include <linux/kernel.h>
 #include <asm/current.h>
+#include <asm/fence.h>
 
 /*
  * Simple spin lock operations.  These provide no fairness guarantees.
@@ -28,10 +29,7 @@
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
-	__asm__ __volatile__ (
-		"amoswap.w.rl x0, x0, %0"
-		: "=A" (lock->lock)
-		:: "memory");
+	smp_store_release(&lock->lock, 0);
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -39,7 +37,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 	int tmp = 1, busy;
 
 	__asm__ __volatile__ (
-		"amoswap.w.aq %0, %2, %1"
+		"	amoswap.w %0, %2, %1\n"
+		RISCV_ACQUIRE_BARRIER
 		: "=r" (busy), "+A" (lock->lock)
 		: "r" (tmp)
 		: "memory");
@@ -68,8 +67,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock)
 		"1:	lr.w	%1, %0\n"
 		"	bltz	%1, 1b\n"
 		"	addi	%1, %1, 1\n"
-		"	sc.w.aq	%1, %1, %0\n"
+		"	sc.w	%1, %1, %0\n"
 		"	bnez	%1, 1b\n"
+		RISCV_ACQUIRE_BARRIER
 		: "+A" (lock->lock), "=&r" (tmp)
 		:: "memory");
 }
@@ -82,8 +82,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock)
 		"1:	lr.w	%1, %0\n"
 		"	bnez	%1, 1b\n"
 		"	li	%1, -1\n"
-		"	sc.w.aq	%1, %1, %0\n"
+		"	sc.w	%1, %1, %0\n"
 		"	bnez	%1, 1b\n"
+		RISCV_ACQUIRE_BARRIER
 		: "+A" (lock->lock), "=&r" (tmp)
 		:: "memory");
 }
@@ -96,8 +97,9 @@ static inline int arch_read_trylock(arch_rwlock_t *lock)
 		"1:	lr.w	%1, %0\n"
 		"	bltz	%1, 1f\n"
 		"	addi	%1, %1, 1\n"
-		"	sc.w.aq	%1, %1, %0\n"
+		"	sc.w	%1, %1, %0\n"
 		"	bnez	%1, 1b\n"
+		RISCV_ACQUIRE_BARRIER
 		"1:\n"
 		: "+A" (lock->lock), "=&r" (busy)
 		:: "memory");
@@ -113,8 +115,9 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
 		"1:	lr.w	%1, %0\n"
 		"	bnez	%1, 1f\n"
 		"	li	%1, -1\n"
-		"	sc.w.aq	%1, %1, %0\n"
+		"	sc.w	%1, %1, %0\n"
 		"	bnez	%1, 1b\n"
+		RISCV_ACQUIRE_BARRIER
 		"1:\n"
 		: "+A" (lock->lock), "=&r" (busy)
 		:: "memory");
@@ -125,7 +128,8 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
 static inline void arch_read_unlock(arch_rwlock_t *lock)
 {
 	__asm__ __volatile__(
-		"amoadd.w.rl x0, %1, %0"
+		RISCV_RELEASE_BARRIER
+		"	amoadd.w x0, %1, %0\n"
 		: "+A" (lock->lock)
 		: "r" (-1)
 		: "memory");
@@ -133,10 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock)
 
 static inline void arch_write_unlock(arch_rwlock_t *lock)
 {
-	__asm__ __volatile__ (
-		"amoswap.w.rl x0, x0, %0"
-		: "=A" (lock->lock)
-		:: "memory");
+	smp_store_release(&lock->lock, 0);
 }
 
 #endif /* _ASM_RISCV_SPINLOCK_H */
diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h
index a510edfa8226..5cae4c30cd8e 100644
--- a/arch/riscv/include/uapi/asm/elf.h
+++ b/arch/riscv/include/uapi/asm/elf.h
@@ -79,5 +79,12 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_TPREL_I		49
 #define R_RISCV_TPREL_S		50
 #define R_RISCV_RELAX		51
+#define R_RISCV_SUB6		52
+#define R_RISCV_SET6		53
+#define R_RISCV_SET8		54
+#define R_RISCV_SET16		55
+#define R_RISCV_SET32		56
+#define R_RISCV_32_PCREL	57
+
 
 #endif /* _UAPI_ASM_ELF_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 196f62ffc428..8586dd96c2f0 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -34,7 +34,9 @@ CFLAGS_setup.o := -mcmodel=medany
 obj-$(CONFIG_SMP)		+= smpboot.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
+obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
+
+obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= mcount-dyn.o
 
 clean:
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 56fa592cfa34..9aaf6c986771 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -167,10 +167,9 @@ ENTRY(handle_exception)
 	bge s4, zero, 1f
 
 	/* Handle interrupts */
-	slli a0, s4, 1
-	srli a0, a0, 1
-	move a1, sp /* pt_regs */
-	tail do_IRQ
+	move a0, sp /* pt_regs */
+	REG_L a1, handle_arch_irq
+	jr a1
 1:
 	/* Exceptions run with interrupts enabled */
 	csrs sstatus, SR_SIE
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index d0de68d144cb..1157b6b52d25 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -6,9 +6,126 @@
  */
 
 #include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+static int ftrace_check_current_call(unsigned long hook_pos,
+				     unsigned int *expected)
+{
+	unsigned int replaced[2];
+	unsigned int nops[2] = {NOP4, NOP4};
+
+	/* we expect nops at the hook position */
+	if (!expected)
+		expected = nops;
+
+	/*
+	 * Read the text we want to modify;
+	 * return must be -EFAULT on read error
+	 */
+	if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/*
+	 * Make sure it is what we expect it to be;
+	 * return must be -EINVAL on failed comparison
+	 */
+	if (memcmp(expected, replaced, sizeof(replaced))) {
+		pr_err("%p: expected (%08x %08x) but get (%08x %08x)",
+		       (void *)hook_pos, expected[0], expected[1], replaced[0],
+		       replaced[1]);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+				bool enable)
+{
+	unsigned int call[2];
+	unsigned int nops[2] = {NOP4, NOP4};
+	int ret = 0;
+
+	make_call(hook_pos, target, call);
+
+	/* replace the auipc-jalr pair at once */
+	ret = probe_kernel_write((void *)hook_pos, enable ? call : nops,
+				 MCOUNT_INSN_SIZE);
+	/* return must be -EPERM on write error */
+	if (ret)
+		return -EPERM;
+
+	smp_mb();
+	flush_icache_range((void *)hook_pos, (void *)hook_pos + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret = ftrace_check_current_call(rec->ip, NULL);
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call(rec->ip, addr, true);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	unsigned int call[2];
+	int ret;
+
+	make_call(rec->ip, addr, call);
+	ret = ftrace_check_current_call(rec->ip, call);
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call(rec->ip, addr, false);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
+				       (unsigned long)func, true);
+	if (!ret) {
+		ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
+					   (unsigned long)func, true);
+	}
+
+	return ret;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	unsigned int call[2];
+	int ret;
+
+	make_call(rec->ip, old_addr, call);
+	ret = ftrace_check_current_call(rec->ip, call);
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call(rec->ip, addr, true);
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
- * Most of this file is copied from arm64.
+ * Most of this function is copied from arm64.
  */
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			   unsigned long frame_pointer)
@@ -34,8 +151,62 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 		return;
 
 	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer, NULL);
+				       frame_pointer, parent);
 	if (err == -EBUSY)
 		return;
 	*parent = return_hooker;
 }
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	unsigned int call[2];
+	static int init_graph = 1;
+	int ret;
+
+	make_call(&ftrace_graph_call, &ftrace_stub, call);
+
+	/*
+	 * When enabling graph tracer for the first time, ftrace_graph_call
+	 * should contains a call to ftrace_stub.  Once it has been disabled,
+	 * the 8-bytes at the position becomes NOPs.
+	 */
+	if (init_graph) {
+		ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+						call);
+		init_graph = 0;
+	} else {
+		ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+						NULL);
+	}
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+				    (unsigned long)&prepare_ftrace_return, true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	unsigned int call[2];
+	int ret;
+
+	make_call(&ftrace_graph_call, &prepare_ftrace_return, call);
+
+	/*
+	 * This is to make sure that ftrace_enable_ftrace_graph_caller
+	 * did the right thing.
+	 */
+	ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+					call);
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+				    (unsigned long)&prepare_ftrace_return, false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 328718e8026e..b74cbfbce2d0 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -24,16 +24,3 @@ void __init init_IRQ(void)
 {
 	irqchip_init();
 }
-
-asmlinkage void __irq_entry do_IRQ(unsigned int cause, struct pt_regs *regs)
-{
-#ifdef CONFIG_RISCV_INTC
-	/*
-	 * FIXME: We don't want a direct call to riscv_intc_irq here.  The plan
-	 * is to put an IRQ domain here and let the interrupt controller
-	 * register with that, but I poked around the arm64 code a bit and
-	 * there might be a better way to do it (ie, something fully generic).
-	 */
-	riscv_intc_irq(cause, regs);
-#endif
-}
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
new file mode 100644
index 000000000000..35a6ed76cb8b
--- /dev/null
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/csr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm-generic/export.h>
+#include <asm/ftrace.h>
+
+	.text
+
+	.macro SAVE_ABI_STATE
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	addi    sp, sp, -48
+	sd      s0, 32(sp)
+	sd      ra, 40(sp)
+	addi    s0, sp, 48
+	sd      t0, 24(sp)
+	sd      t1, 16(sp)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	sd      t2, 8(sp)
+#endif
+#else
+	addi	sp, sp, -16
+	sd	s0, 0(sp)
+	sd	ra, 8(sp)
+	addi	s0, sp, 16
+#endif
+	.endm
+
+	.macro RESTORE_ABI_STATE
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ld	s0, 32(sp)
+	ld	ra, 40(sp)
+	addi	sp, sp, 48
+#else
+	ld	ra, 8(sp)
+	ld	s0, 0(sp)
+	addi	sp, sp, 16
+#endif
+	.endm
+
+	.macro RESTORE_GRAPH_ARGS
+	ld	a0, 24(sp)
+	ld	a1, 16(sp)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	ld	a2, 8(sp)
+#endif
+	.endm
+
+ENTRY(ftrace_graph_caller)
+	addi	sp, sp, -16
+	sd	s0, 0(sp)
+	sd	ra, 8(sp)
+	addi	s0, sp, 16
+ftrace_graph_call:
+	.global ftrace_graph_call
+	/*
+	 * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the
+	 * call below.  Check ftrace_modify_all_code for details.
+	 */
+	call	ftrace_stub
+	ld	ra, 8(sp)
+	ld	s0, 0(sp)
+	addi	sp, sp, 16
+	ret
+ENDPROC(ftrace_graph_caller)
+
+ENTRY(ftrace_caller)
+	/*
+	 * a0: the address in the caller when calling ftrace_caller
+	 * a1: the caller's return address
+	 * a2: the address of global variable function_trace_op
+	 */
+	ld	a1, -8(s0)
+	addi	a0, ra, -MCOUNT_INSN_SIZE
+	la	t5, function_trace_op
+	ld	a2, 0(t5)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/*
+	 * the graph tracer (specifically, prepare_ftrace_return) needs these
+	 * arguments but for now the function tracer occupies the regs, so we
+	 * save them in temporary regs to recover later.
+	 */
+	addi	t0, s0, -8
+	mv	t1, a0
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	ld	t2, -16(s0)
+#endif
+#endif
+
+	SAVE_ABI_STATE
+ftrace_call:
+	.global ftrace_call
+	/*
+	 * For the dynamic ftrace to work, here we should reserve at least
+	 * 8 bytes for a functional auipc-jalr pair.  The following call
+	 * serves this purpose.
+	 *
+	 * Calling ftrace_update_ftrace_func would overwrite the nops below.
+	 * Check ftrace_modify_all_code for details.
+	 */
+	call	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	RESTORE_GRAPH_ARGS
+	call	ftrace_graph_caller
+#endif
+
+	RESTORE_ABI_STATE
+	ret
+ENDPROC(ftrace_caller)
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	.macro SAVE_ALL
+	addi	sp, sp, -(PT_SIZE_ON_STACK+16)
+	sd	s0, (PT_SIZE_ON_STACK)(sp)
+	sd	ra, (PT_SIZE_ON_STACK+8)(sp)
+	addi	s0, sp, (PT_SIZE_ON_STACK+16)
+
+	sd x1,  PT_RA(sp)
+	sd x2,  PT_SP(sp)
+	sd x3,  PT_GP(sp)
+	sd x4,  PT_TP(sp)
+	sd x5,  PT_T0(sp)
+	sd x6,  PT_T1(sp)
+	sd x7,  PT_T2(sp)
+	sd x8,  PT_S0(sp)
+	sd x9,  PT_S1(sp)
+	sd x10, PT_A0(sp)
+	sd x11, PT_A1(sp)
+	sd x12, PT_A2(sp)
+	sd x13, PT_A3(sp)
+	sd x14, PT_A4(sp)
+	sd x15, PT_A5(sp)
+	sd x16, PT_A6(sp)
+	sd x17, PT_A7(sp)
+	sd x18, PT_S2(sp)
+	sd x19, PT_S3(sp)
+	sd x20, PT_S4(sp)
+	sd x21, PT_S5(sp)
+	sd x22, PT_S6(sp)
+	sd x23, PT_S7(sp)
+	sd x24, PT_S8(sp)
+	sd x25, PT_S9(sp)
+	sd x26, PT_S10(sp)
+	sd x27, PT_S11(sp)
+	sd x28, PT_T3(sp)
+	sd x29, PT_T4(sp)
+	sd x30, PT_T5(sp)
+	sd x31, PT_T6(sp)
+	.endm
+
+	.macro RESTORE_ALL
+	ld x1,  PT_RA(sp)
+	ld x2,  PT_SP(sp)
+	ld x3,  PT_GP(sp)
+	ld x4,  PT_TP(sp)
+	ld x5,  PT_T0(sp)
+	ld x6,  PT_T1(sp)
+	ld x7,  PT_T2(sp)
+	ld x8,  PT_S0(sp)
+	ld x9,  PT_S1(sp)
+	ld x10, PT_A0(sp)
+	ld x11, PT_A1(sp)
+	ld x12, PT_A2(sp)
+	ld x13, PT_A3(sp)
+	ld x14, PT_A4(sp)
+	ld x15, PT_A5(sp)
+	ld x16, PT_A6(sp)
+	ld x17, PT_A7(sp)
+	ld x18, PT_S2(sp)
+	ld x19, PT_S3(sp)
+	ld x20, PT_S4(sp)
+	ld x21, PT_S5(sp)
+	ld x22, PT_S6(sp)
+	ld x23, PT_S7(sp)
+	ld x24, PT_S8(sp)
+	ld x25, PT_S9(sp)
+	ld x26, PT_S10(sp)
+	ld x27, PT_S11(sp)
+	ld x28, PT_T3(sp)
+	ld x29, PT_T4(sp)
+	ld x30, PT_T5(sp)
+	ld x31, PT_T6(sp)
+
+	ld	s0, (PT_SIZE_ON_STACK)(sp)
+	ld	ra, (PT_SIZE_ON_STACK+8)(sp)
+	addi	sp, sp, (PT_SIZE_ON_STACK+16)
+	.endm
+
+	.macro RESTORE_GRAPH_REG_ARGS
+	ld	a0, PT_T0(sp)
+	ld	a1, PT_T1(sp)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	ld	a2, PT_T2(sp)
+#endif
+	.endm
+
+/*
+ * Most of the contents are the same as ftrace_caller.
+ */
+ENTRY(ftrace_regs_caller)
+	/*
+	 * a3: the address of all registers in the stack
+	 */
+	ld	a1, -8(s0)
+	addi	a0, ra, -MCOUNT_INSN_SIZE
+	la	t5, function_trace_op
+	ld	a2, 0(t5)
+	addi	a3, sp, -(PT_SIZE_ON_STACK+16)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	addi	t0, s0, -8
+	mv	t1, a0
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	ld	t2, -16(s0)
+#endif
+#endif
+	SAVE_ALL
+
+ftrace_regs_call:
+	.global ftrace_regs_call
+	call	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	RESTORE_GRAPH_REG_ARGS
+	call	ftrace_graph_caller
+#endif
+
+	RESTORE_ALL
+	ret
+ENDPROC(ftrace_regs_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index c46a778627be..ce9bdc57a2a1 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -32,13 +32,13 @@
 	addi	s0, sp, 32
 	.endm
 
-	.macro STORE_ABI_STATE
+	.macro RESTORE_ABI_STATE
 	ld	ra, 8(sp)
 	ld	s0, 0(sp)
 	addi	sp, sp, 16
 	.endm
 
-	.macro STORE_RET_ABI_STATE
+	.macro RESTORE_RET_ABI_STATE
 	ld	ra, 24(sp)
 	ld	s0, 16(sp)
 	ld	a0, 8(sp)
@@ -46,6 +46,10 @@
 	.endm
 
 ENTRY(ftrace_stub)
+#ifdef CONFIG_DYNAMIC_FTRACE
+       .global _mcount
+       .set    _mcount, ftrace_stub
+#endif
 	ret
 ENDPROC(ftrace_stub)
 
@@ -66,15 +70,15 @@ ENTRY(return_to_handler)
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	mv	a0, t6
 #endif
-	la	t0, ftrace_return_to_handler
-	jalr	t0
+	call	ftrace_return_to_handler
 	mv	a1, a0
-	STORE_RET_ABI_STATE
+	RESTORE_RET_ABI_STATE
 	jalr	a1
 ENDPROC(return_to_handler)
 EXPORT_SYMBOL(return_to_handler)
 #endif
 
+#ifndef CONFIG_DYNAMIC_FTRACE
 ENTRY(_mcount)
 	la	t4, ftrace_stub
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -104,9 +108,8 @@ do_ftrace_graph_caller:
 	ld	a2, -16(s0)
 #endif
 	SAVE_ABI_STATE
-	la	t0, prepare_ftrace_return
-	jalr	t0
-	STORE_ABI_STATE
+	call	prepare_ftrace_return
+	RESTORE_ABI_STATE
 	ret
 #endif
 
@@ -120,7 +123,8 @@ do_trace:
 
 	SAVE_ABI_STATE
 	jalr	t5
-	STORE_ABI_STATE
+	RESTORE_ABI_STATE
 	ret
 ENDPROC(_mcount)
 EXPORT_SYMBOL(_mcount)
+#endif
diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c
new file mode 100644
index 000000000000..bbbd26e19bfd
--- /dev/null
+++ b/arch/riscv/kernel/module-sections.c
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * Copyright (C) 2018 Andes Technology Corporation <zong@andestech.com>
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+u64 module_emit_got_entry(struct module *mod, u64 val)
+{
+	struct mod_section *got_sec = &mod->arch.got;
+	int i = got_sec->num_entries;
+	struct got_entry *got = get_got_entry(val, got_sec);
+
+	if (got)
+		return (u64)got;
+
+	/* There is no duplicate entry, create a new one */
+	got = (struct got_entry *)got_sec->shdr->sh_addr;
+	got[i] = emit_got_entry(val);
+
+	got_sec->num_entries++;
+	BUG_ON(got_sec->num_entries > got_sec->max_entries);
+
+	return (u64)&got[i];
+}
+
+u64 module_emit_plt_entry(struct module *mod, u64 val)
+{
+	struct mod_section *got_plt_sec = &mod->arch.got_plt;
+	struct got_entry *got_plt;
+	struct mod_section *plt_sec = &mod->arch.plt;
+	struct plt_entry *plt = get_plt_entry(val, plt_sec, got_plt_sec);
+	int i = plt_sec->num_entries;
+
+	if (plt)
+		return (u64)plt;
+
+	/* There is no duplicate entry, create a new one */
+	got_plt = (struct got_entry *)got_plt_sec->shdr->sh_addr;
+	got_plt[i] = emit_got_entry(val);
+	plt = (struct plt_entry *)plt_sec->shdr->sh_addr;
+	plt[i] = emit_plt_entry(val, (u64)&plt[i], (u64)&got_plt[i]);
+
+	plt_sec->num_entries++;
+	got_plt_sec->num_entries++;
+	BUG_ON(plt_sec->num_entries > plt_sec->max_entries);
+
+	return (u64)&plt[i];
+}
+
+static int is_rela_equal(const Elf64_Rela *x, const Elf64_Rela *y)
+{
+	return x->r_info == y->r_info && x->r_addend == y->r_addend;
+}
+
+static bool duplicate_rela(const Elf64_Rela *rela, int idx)
+{
+	int i;
+	for (i = 0; i < idx; i++) {
+		if (is_rela_equal(&rela[i], &rela[idx]))
+			return true;
+	}
+	return false;
+}
+
+static void count_max_entries(Elf64_Rela *relas, int num,
+			      unsigned int *plts, unsigned int *gots)
+{
+	unsigned int type, i;
+
+	for (i = 0; i < num; i++) {
+		type = ELF64_R_TYPE(relas[i].r_info);
+		if (type == R_RISCV_CALL_PLT) {
+			if (!duplicate_rela(relas, i))
+				(*plts)++;
+		} else if (type == R_RISCV_GOT_HI20) {
+			if (!duplicate_rela(relas, i))
+				(*gots)++;
+		}
+	}
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *mod)
+{
+	unsigned int num_plts = 0;
+	unsigned int num_gots = 0;
+	int i;
+
+	/*
+	 * Find the empty .got and .plt sections.
+	 */
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
+			mod->arch.plt.shdr = sechdrs + i;
+		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got"))
+			mod->arch.got.shdr = sechdrs + i;
+		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got.plt"))
+			mod->arch.got_plt.shdr = sechdrs + i;
+	}
+
+	if (!mod->arch.plt.shdr) {
+		pr_err("%s: module PLT section(s) missing\n", mod->name);
+		return -ENOEXEC;
+	}
+	if (!mod->arch.got.shdr) {
+		pr_err("%s: module GOT section(s) missing\n", mod->name);
+		return -ENOEXEC;
+	}
+	if (!mod->arch.got_plt.shdr) {
+		pr_err("%s: module GOT.PLT section(s) missing\n", mod->name);
+		return -ENOEXEC;
+	}
+
+	/* Calculate the maxinum number of entries */
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		Elf64_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset;
+		int num_rela = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+		Elf64_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info;
+
+		if (sechdrs[i].sh_type != SHT_RELA)
+			continue;
+
+		/* ignore relocations that operate on non-exec sections */
+		if (!(dst_sec->sh_flags & SHF_EXECINSTR))
+			continue;
+
+		count_max_entries(relas, num_rela, &num_plts, &num_gots);
+	}
+
+	mod->arch.plt.shdr->sh_type = SHT_NOBITS;
+	mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry);
+	mod->arch.plt.num_entries = 0;
+	mod->arch.plt.max_entries = num_plts;
+
+	mod->arch.got.shdr->sh_type = SHT_NOBITS;
+	mod->arch.got.shdr->sh_flags = SHF_ALLOC;
+	mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry);
+	mod->arch.got.num_entries = 0;
+	mod->arch.got.max_entries = num_gots;
+
+	mod->arch.got_plt.shdr->sh_type = SHT_NOBITS;
+	mod->arch.got_plt.shdr->sh_flags = SHF_ALLOC;
+	mod->arch.got_plt.shdr->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.got_plt.shdr->sh_size = (num_plts + 1) * sizeof(struct got_entry);
+	mod->arch.got_plt.num_entries = 0;
+	mod->arch.got_plt.max_entries = num_plts;
+	return 0;
+}
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index e0f05034fc21..5dddba301d0a 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -49,6 +49,39 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location,
 	return 0;
 }
 
+static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location,
+					 Elf_Addr v)
+{
+	s64 offset = (void *)v - (void *)location;
+	u16 imm8 = (offset & 0x100) << (12 - 8);
+	u16 imm7_6 = (offset & 0xc0) >> (6 - 5);
+	u16 imm5 = (offset & 0x20) >> (5 - 2);
+	u16 imm4_3 = (offset & 0x18) << (12 - 5);
+	u16 imm2_1 = (offset & 0x6) << (12 - 10);
+
+	*(u16 *)location = (*(u16 *)location & 0xe383) |
+		    imm8 | imm7_6 | imm5 | imm4_3 | imm2_1;
+	return 0;
+}
+
+static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
+				       Elf_Addr v)
+{
+	s64 offset = (void *)v - (void *)location;
+	u16 imm11 = (offset & 0x800) << (12 - 11);
+	u16 imm10 = (offset & 0x400) >> (10 - 8);
+	u16 imm9_8 = (offset & 0x300) << (12 - 11);
+	u16 imm7 = (offset & 0x80) >> (7 - 6);
+	u16 imm6 = (offset & 0x40) << (12 - 11);
+	u16 imm5 = (offset & 0x20) >> (5 - 2);
+	u16 imm4 = (offset & 0x10) << (12 - 5);
+	u16 imm3_1 = (offset & 0xe) << (12 - 10);
+
+	*(u16 *)location = (*(u16 *)location & 0xe003) |
+		    imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1;
+	return 0;
+}
+
 static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
 					 Elf_Addr v)
 {
@@ -92,6 +125,67 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location,
 	return 0;
 }
 
+static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
+				   Elf_Addr v)
+{
+	s32 hi20;
+
+	if (IS_ENABLED(CMODEL_MEDLOW)) {
+		pr_err(
+		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
+		  me->name, v, location);
+		return -EINVAL;
+	}
+
+	hi20 = ((s32)v + 0x800) & 0xfffff000;
+	*location = (*location & 0xfff) | hi20;
+	return 0;
+}
+
+static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location,
+				     Elf_Addr v)
+{
+	/* Skip medlow checking because of filtering by HI20 already */
+	s32 hi20 = ((s32)v + 0x800) & 0xfffff000;
+	s32 lo12 = ((s32)v - hi20);
+	*location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20);
+	return 0;
+}
+
+static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location,
+				     Elf_Addr v)
+{
+	/* Skip medlow checking because of filtering by HI20 already */
+	s32 hi20 = ((s32)v + 0x800) & 0xfffff000;
+	s32 lo12 = ((s32)v - hi20);
+	u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11);
+	u32 imm4_0 = (lo12 & 0x1f) << (11 - 4);
+	*location = (*location & 0x1fff07f) | imm11_5 | imm4_0;
+	return 0;
+}
+
+static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
+				       Elf_Addr v)
+{
+	s64 offset = (void *)v - (void *)location;
+	s32 hi20;
+
+	/* Always emit the got entry */
+	if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
+		offset = module_emit_got_entry(me, v);
+		offset = (void *)offset - (void *)location;
+	} else {
+		pr_err(
+		  "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n",
+		  me->name, v, location);
+		return -EINVAL;
+	}
+
+	hi20 = (offset + 0x800) & 0xfffff000;
+	*location = (*location & 0xfff) | hi20;
+	return 0;
+}
+
 static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 				       Elf_Addr v)
 {
@@ -100,6 +194,33 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 	u32 hi20, lo12;
 
 	if (offset != fill_v) {
+		/* Only emit the plt entry if offset over 32-bit range */
+		if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
+			offset = module_emit_plt_entry(me, v);
+			offset = (void *)offset - (void *)location;
+		} else {
+			pr_err(
+			  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
+			  me->name, v, location);
+			return -EINVAL;
+		}
+	}
+
+	hi20 = (offset + 0x800) & 0xfffff000;
+	lo12 = (offset - hi20) & 0xfff;
+	*location = (*location & 0xfff) | hi20;
+	*(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20);
+	return 0;
+}
+
+static int apply_r_riscv_call_rela(struct module *me, u32 *location,
+				   Elf_Addr v)
+{
+	s64 offset = (void *)v - (void *)location;
+	s32 fill_v = offset;
+	u32 hi20, lo12;
+
+	if (offset != fill_v) {
 		pr_err(
 		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
 		  me->name, v, location);
@@ -119,16 +240,49 @@ static int apply_r_riscv_relax_rela(struct module *me, u32 *location,
 	return 0;
 }
 
+static int apply_r_riscv_align_rela(struct module *me, u32 *location,
+				    Elf_Addr v)
+{
+	pr_err(
+	  "%s: The unexpected relocation type 'R_RISCV_ALIGN' from PC = %p\n",
+	  me->name, location);
+	return -EINVAL;
+}
+
+static int apply_r_riscv_add32_rela(struct module *me, u32 *location,
+				    Elf_Addr v)
+{
+	*(u32 *)location += (*(u32 *)v);
+	return 0;
+}
+
+static int apply_r_riscv_sub32_rela(struct module *me, u32 *location,
+				    Elf_Addr v)
+{
+	*(u32 *)location -= (*(u32 *)v);
+	return 0;
+}
+
 static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
 				Elf_Addr v) = {
 	[R_RISCV_64]			= apply_r_riscv_64_rela,
 	[R_RISCV_BRANCH]		= apply_r_riscv_branch_rela,
 	[R_RISCV_JAL]			= apply_r_riscv_jal_rela,
+	[R_RISCV_RVC_BRANCH]		= apply_r_riscv_rcv_branch_rela,
+	[R_RISCV_RVC_JUMP]		= apply_r_riscv_rvc_jump_rela,
 	[R_RISCV_PCREL_HI20]		= apply_r_riscv_pcrel_hi20_rela,
 	[R_RISCV_PCREL_LO12_I]		= apply_r_riscv_pcrel_lo12_i_rela,
 	[R_RISCV_PCREL_LO12_S]		= apply_r_riscv_pcrel_lo12_s_rela,
+	[R_RISCV_HI20]			= apply_r_riscv_hi20_rela,
+	[R_RISCV_LO12_I]		= apply_r_riscv_lo12_i_rela,
+	[R_RISCV_LO12_S]		= apply_r_riscv_lo12_s_rela,
+	[R_RISCV_GOT_HI20]		= apply_r_riscv_got_hi20_rela,
 	[R_RISCV_CALL_PLT]		= apply_r_riscv_call_plt_rela,
+	[R_RISCV_CALL]			= apply_r_riscv_call_rela,
 	[R_RISCV_RELAX]			= apply_r_riscv_relax_rela,
+	[R_RISCV_ALIGN]			= apply_r_riscv_align_rela,
+	[R_RISCV_ADD32]			= apply_r_riscv_add32_rela,
+	[R_RISCV_SUB32]			= apply_r_riscv_sub32_rela,
 };
 
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
@@ -184,25 +338,38 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 				u64 hi20_loc =
 					sechdrs[sechdrs[relsec].sh_info].sh_addr
 					+ rel[j].r_offset;
-				/* Find the corresponding HI20 PC-relative relocation entry */
-				if (hi20_loc == sym->st_value) {
+				u32 hi20_type = ELF_RISCV_R_TYPE(rel[j].r_info);
+
+				/* Find the corresponding HI20 relocation entry */
+				if (hi20_loc == sym->st_value
+				    && (hi20_type == R_RISCV_PCREL_HI20
+					|| hi20_type == R_RISCV_GOT_HI20)) {
+					s32 hi20, lo12;
 					Elf_Sym *hi20_sym =
 						(Elf_Sym *)sechdrs[symindex].sh_addr
 						+ ELF_RISCV_R_SYM(rel[j].r_info);
 					u64 hi20_sym_val =
 						hi20_sym->st_value
 						+ rel[j].r_addend;
+
 					/* Calculate lo12 */
-					s64 offset = hi20_sym_val - hi20_loc;
-					s32 hi20 = (offset + 0x800) & 0xfffff000;
-					s32 lo12 = offset - hi20;
+					u64 offset = hi20_sym_val - hi20_loc;
+					if (IS_ENABLED(CONFIG_MODULE_SECTIONS)
+					    && hi20_type == R_RISCV_GOT_HI20) {
+						offset = module_emit_got_entry(
+							 me, hi20_sym_val);
+						offset = offset - hi20_loc;
+					}
+					hi20 = (offset + 0x800) & 0xfffff000;
+					lo12 = offset - hi20;
 					v = lo12;
+
 					break;
 				}
 			}
 			if (j == sechdrs[relsec].sh_size / sizeof(*rel)) {
 				pr_err(
-				  "%s: Can not find HI20 PC-relative relocation information\n",
+				  "%s: Can not find HI20 relocation information\n",
 				  me->name);
 				return -EINVAL;
 			}
diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/kernel/module.lds
new file mode 100644
index 000000000000..295ecfb341a2
--- /dev/null
+++ b/arch/riscv/kernel/module.lds
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+SECTIONS {
+	.plt (NOLOAD) : { BYTE(0) }
+	.got (NOLOAD) : { BYTE(0) }
+	.got.plt (NOLOAD) : { BYTE(0) }
+}
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 559aae781154..a4b1d94371a0 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -18,6 +18,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
+#include <linux/ftrace.h>
 
 #ifdef CONFIG_FRAME_POINTER
 
@@ -63,7 +64,12 @@ static void notrace walk_stackframe(struct task_struct *task,
 		frame = (struct stackframe *)fp - 1;
 		sp = fp;
 		fp = frame->fp;
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+		pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+					   (unsigned long *)(fp - 8));
+#else
 		pc = frame->ra - 0x4;
+#endif
 	}
 }
 
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 12e8484a8ee7..e762ef417562 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -94,23 +94,30 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 
 
 #define	STACK_OFFSET    8*3
-#define	HashKey		16*0	// store HashKey <<1 mod poly here
-#define	HashKey_2	16*1	// store HashKey^2 <<1 mod poly here
-#define	HashKey_3	16*2	// store HashKey^3 <<1 mod poly here
-#define	HashKey_4	16*3	// store HashKey^4 <<1 mod poly here
-#define	HashKey_k	16*4	// store XOR of High 64 bits and Low 64
+
+#define AadHash 16*0
+#define AadLen 16*1
+#define InLen (16*1)+8
+#define PBlockEncKey 16*2
+#define OrigIV 16*3
+#define CurCount 16*4
+#define PBlockLen 16*5
+#define	HashKey		16*6	// store HashKey <<1 mod poly here
+#define	HashKey_2	16*7	// store HashKey^2 <<1 mod poly here
+#define	HashKey_3	16*8	// store HashKey^3 <<1 mod poly here
+#define	HashKey_4	16*9	// store HashKey^4 <<1 mod poly here
+#define	HashKey_k	16*10	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey <<1 mod poly here
 				//(for Karatsuba purposes)
-#define	HashKey_2_k	16*5	// store XOR of High 64 bits and Low 64
+#define	HashKey_2_k	16*11	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^2 <<1 mod poly here
 				// (for Karatsuba purposes)
-#define	HashKey_3_k	16*6	// store XOR of High 64 bits and Low 64
+#define	HashKey_3_k	16*12	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^3 <<1 mod poly here
 				// (for Karatsuba purposes)
-#define	HashKey_4_k	16*7	// store XOR of High 64 bits and Low 64
+#define	HashKey_4_k	16*13	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^4 <<1 mod poly here
 				// (for Karatsuba purposes)
-#define	VARIABLE_OFFSET	16*8
 
 #define arg1 rdi
 #define arg2 rsi
@@ -118,10 +125,11 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 #define arg4 rcx
 #define arg5 r8
 #define arg6 r9
-#define arg7 STACK_OFFSET+8(%r14)
-#define arg8 STACK_OFFSET+16(%r14)
-#define arg9 STACK_OFFSET+24(%r14)
-#define arg10 STACK_OFFSET+32(%r14)
+#define arg7 STACK_OFFSET+8(%rsp)
+#define arg8 STACK_OFFSET+16(%rsp)
+#define arg9 STACK_OFFSET+24(%rsp)
+#define arg10 STACK_OFFSET+32(%rsp)
+#define arg11 STACK_OFFSET+40(%rsp)
 #define keysize 2*15*16(%arg1)
 #endif
 
@@ -171,6 +179,332 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 #define TKEYP	T1
 #endif
 
+.macro FUNC_SAVE
+	push	%r12
+	push	%r13
+	push	%r14
+#
+# states of %xmm registers %xmm6:%xmm15 not saved
+# all %xmm registers are clobbered
+#
+.endm
+
+
+.macro FUNC_RESTORE
+	pop	%r14
+	pop	%r13
+	pop	%r12
+.endm
+
+# Precompute hashkeys.
+# Input: Hash subkey.
+# Output: HashKeys stored in gcm_context_data.  Only needs to be called
+# once per key.
+# clobbers r12, and tmp xmm registers.
+.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
+	mov	\SUBKEY, %r12
+	movdqu	(%r12), \TMP3
+	movdqa	SHUF_MASK(%rip), \TMP2
+	PSHUFB_XMM \TMP2, \TMP3
+
+	# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
+
+	movdqa	\TMP3, \TMP2
+	psllq	$1, \TMP3
+	psrlq	$63, \TMP2
+	movdqa	\TMP2, \TMP1
+	pslldq	$8, \TMP2
+	psrldq	$8, \TMP1
+	por	\TMP2, \TMP3
+
+	# reduce HashKey<<1
+
+	pshufd	$0x24, \TMP1, \TMP2
+	pcmpeqd TWOONE(%rip), \TMP2
+	pand	POLY(%rip), \TMP2
+	pxor	\TMP2, \TMP3
+	movdqa	\TMP3, HashKey(%arg2)
+
+	movdqa	   \TMP3, \TMP5
+	pshufd	   $78, \TMP3, \TMP1
+	pxor	   \TMP3, \TMP1
+	movdqa	   \TMP1, HashKey_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^2<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_2(%arg2)
+# HashKey_2 = HashKey^2<<1 (mod poly)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_2_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_3(%arg2)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_3_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_4(%arg2)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_4_k(%arg2)
+.endm
+
+# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
+# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
+.macro GCM_INIT Iv SUBKEY AAD AADLEN
+	mov \AADLEN, %r11
+	mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
+	xor %r11, %r11
+	mov %r11, InLen(%arg2) # ctx_data.in_length = 0
+	mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
+	mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
+	mov \Iv, %rax
+	movdqu (%rax), %xmm0
+	movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
+
+	movdqa  SHUF_MASK(%rip), %xmm2
+	PSHUFB_XMM %xmm2, %xmm0
+	movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
+
+	PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+	movdqa HashKey(%arg2), %xmm13
+
+	CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
+	%xmm4, %xmm5, %xmm6
+.endm
+
+# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
+# struct has been initialized by GCM_INIT.
+# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
+# Clobbers rax, r10-r13, and xmm0-xmm15
+.macro GCM_ENC_DEC operation
+	movdqu AadHash(%arg2), %xmm8
+	movdqu HashKey(%arg2), %xmm13
+	add %arg5, InLen(%arg2)
+
+	xor %r11, %r11 # initialise the data pointer offset as zero
+	PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
+
+	sub %r11, %arg5		# sub partial block data used
+	mov %arg5, %r13		# save the number of bytes
+
+	and $-16, %r13		# %r13 = %r13 - (%r13 mod 16)
+	mov %r13, %r12
+	# Encrypt/Decrypt first few blocks
+
+	and	$(3<<4), %r12
+	jz	_initial_num_blocks_is_0_\@
+	cmp	$(2<<4), %r12
+	jb	_initial_num_blocks_is_1_\@
+	je	_initial_num_blocks_is_2_\@
+_initial_num_blocks_is_3_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
+	sub	$48, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_2_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
+	sub	$32, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_1_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
+	sub	$16, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_0_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
+_initial_blocks_\@:
+
+	# Main loop - Encrypt/Decrypt remaining blocks
+
+	cmp	$0, %r13
+	je	_zero_cipher_left_\@
+	sub	$64, %r13
+	je	_four_cipher_left_\@
+_crypt_by_4_\@:
+	GHASH_4_ENCRYPT_4_PARALLEL_\operation	%xmm9, %xmm10, %xmm11, %xmm12, \
+	%xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
+	%xmm7, %xmm8, enc
+	add	$64, %r11
+	sub	$64, %r13
+	jne	_crypt_by_4_\@
+_four_cipher_left_\@:
+	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
+%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
+_zero_cipher_left_\@:
+	movdqu %xmm8, AadHash(%arg2)
+	movdqu %xmm0, CurCount(%arg2)
+
+	mov	%arg5, %r13
+	and	$15, %r13			# %r13 = arg5 (mod 16)
+	je	_multiple_of_16_bytes_\@
+
+	mov %r13, PBlockLen(%arg2)
+
+	# Handle the last <16 Byte block separately
+	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
+	movdqu %xmm0, CurCount(%arg2)
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10, %xmm0
+
+	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
+	movdqu %xmm0, PBlockEncKey(%arg2)
+
+	cmp	$16, %arg5
+	jge _large_enough_update_\@
+
+	lea (%arg4,%r11,1), %r10
+	mov %r13, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+	jmp _data_read_\@
+
+_large_enough_update_\@:
+	sub	$16, %r11
+	add	%r13, %r11
+
+	# receive the last <16 Byte block
+	movdqu	(%arg4, %r11, 1), %xmm1
+
+	sub	%r13, %r11
+	add	$16, %r11
+
+	lea	SHIFT_MASK+16(%rip), %r12
+	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
+	# (r13 is the number of bytes in plaintext mod 16)
+	sub	%r13, %r12
+	# get the appropriate shuffle mask
+	movdqu	(%r12), %xmm2
+	# shift right 16-r13 bytes
+	PSHUFB_XMM  %xmm2, %xmm1
+
+_data_read_\@:
+	lea ALL_F+16(%rip), %r12
+	sub %r13, %r12
+
+.ifc \operation, dec
+	movdqa  %xmm1, %xmm2
+.endif
+	pxor	%xmm1, %xmm0            # XOR Encrypt(K, Yn)
+	movdqu	(%r12), %xmm1
+	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
+	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
+.ifc \operation, dec
+	pand    %xmm1, %xmm2
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10 ,%xmm2
+
+	pxor %xmm2, %xmm8
+.else
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10,%xmm0
+
+	pxor	%xmm0, %xmm8
+.endif
+
+	movdqu %xmm8, AadHash(%arg2)
+.ifc \operation, enc
+	# GHASH computation for the last <16 byte block
+	movdqa SHUF_MASK(%rip), %xmm10
+	# shuffle xmm0 back to output as ciphertext
+	PSHUFB_XMM %xmm10, %xmm0
+.endif
+
+	# Output %r13 bytes
+	MOVQ_R64_XMM %xmm0, %rax
+	cmp $8, %r13
+	jle _less_than_8_bytes_left_\@
+	mov %rax, (%arg3 , %r11, 1)
+	add $8, %r11
+	psrldq $8, %xmm0
+	MOVQ_R64_XMM %xmm0, %rax
+	sub $8, %r13
+_less_than_8_bytes_left_\@:
+	mov %al,  (%arg3, %r11, 1)
+	add $1, %r11
+	shr $8, %rax
+	sub $1, %r13
+	jne _less_than_8_bytes_left_\@
+_multiple_of_16_bytes_\@:
+.endm
+
+# GCM_COMPLETE Finishes update of tag of last partial block
+# Output: Authorization Tag (AUTH_TAG)
+# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
+.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
+	movdqu AadHash(%arg2), %xmm8
+	movdqu HashKey(%arg2), %xmm13
+
+	mov PBlockLen(%arg2), %r12
+
+	cmp $0, %r12
+	je _partial_done\@
+
+	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+
+_partial_done\@:
+	mov AadLen(%arg2), %r12  # %r13 = aadLen (number of bytes)
+	shl	$3, %r12		  # convert into number of bits
+	movd	%r12d, %xmm15		  # len(A) in %xmm15
+	mov InLen(%arg2), %r12
+	shl     $3, %r12                  # len(C) in bits (*128)
+	MOVQ_R64_XMM    %r12, %xmm1
+
+	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
+	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
+	pxor	%xmm15, %xmm8
+	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+	# final GHASH computation
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10, %xmm8
+
+	movdqu OrigIV(%arg2), %xmm0       # %xmm0 = Y0
+	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
+	pxor	%xmm8, %xmm0
+_return_T_\@:
+	mov	\AUTHTAG, %r10                     # %r10 = authTag
+	mov	\AUTHTAGLEN, %r11                    # %r11 = auth_tag_len
+	cmp	$16, %r11
+	je	_T_16_\@
+	cmp	$8, %r11
+	jl	_T_4_\@
+_T_8_\@:
+	MOVQ_R64_XMM	%xmm0, %rax
+	mov	%rax, (%r10)
+	add	$8, %r10
+	sub	$8, %r11
+	psrldq	$8, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_\@
+_T_4_\@:
+	movd	%xmm0, %eax
+	mov	%eax, (%r10)
+	add	$4, %r10
+	sub	$4, %r11
+	psrldq	$4, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_\@
+_T_123_\@:
+	movd	%xmm0, %eax
+	cmp	$2, %r11
+	jl	_T_1_\@
+	mov	%ax, (%r10)
+	cmp	$2, %r11
+	je	_return_T_done_\@
+	add	$2, %r10
+	sar	$16, %eax
+_T_1_\@:
+	mov	%al, (%r10)
+	jmp	_return_T_done_\@
+_T_16_\@:
+	movdqu	%xmm0, (%r10)
+_return_T_done_\@:
+.endm
 
 #ifdef __x86_64__
 /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
@@ -264,232 +598,188 @@ _read_next_byte_lt8_\@:
 _done_read_partial_block_\@:
 .endm
 
-/*
-* if a = number of total plaintext bytes
-* b = floor(a/16)
-* num_initial_blocks = b mod 4
-* encrypt the initial num_initial_blocks blocks and apply ghash on
-* the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
-* are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
-*/
-
-
-.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-        MOVADQ     SHUF_MASK(%rip), %xmm14
-	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r11           # %r11 = aadLen
-	pxor	   %xmm\i, %xmm\i
-	pxor       \XMM2, \XMM2
+# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
+# clobbers r10-11, xmm14
+.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
+	TMP6 TMP7
+	MOVADQ	   SHUF_MASK(%rip), %xmm14
+	mov	   \AAD, %r10		# %r10 = AAD
+	mov	   \AADLEN, %r11		# %r11 = aadLen
+	pxor	   \TMP7, \TMP7
+	pxor	   \TMP6, \TMP6
 
 	cmp	   $16, %r11
-	jl	   _get_AAD_rest\num_initial_blocks\operation
-_get_AAD_blocks\num_initial_blocks\operation:
-	movdqu	   (%r10), %xmm\i
-	PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   %xmm\i, \XMM2
-	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	jl	   _get_AAD_rest\@
+_get_AAD_blocks\@:
+	movdqu	   (%r10), \TMP7
+	PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
+	pxor	   \TMP7, \TMP6
+	GHASH_MUL  \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
 	add	   $16, %r10
 	sub	   $16, %r11
 	cmp	   $16, %r11
-	jge	   _get_AAD_blocks\num_initial_blocks\operation
+	jge	   _get_AAD_blocks\@
 
-	movdqu	   \XMM2, %xmm\i
+	movdqu	   \TMP6, \TMP7
 
 	/* read the last <16B of AAD */
-_get_AAD_rest\num_initial_blocks\operation:
+_get_AAD_rest\@:
 	cmp	   $0, %r11
-	je	   _get_AAD_done\num_initial_blocks\operation
-
-	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   \XMM2, %xmm\i
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	je	   _get_AAD_done\@
 
-_get_AAD_done\num_initial_blocks\operation:
-	xor	   %r11, %r11 # initialise the data pointer offset as zero
-	# start AES for num_initial_blocks blocks
-
-	mov	   %arg5, %rax                      # %rax = *Y0
-	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
-	PSHUFB_XMM   %xmm14, \XMM0
+	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
+	PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
+	pxor	   \TMP6, \TMP7
+	GHASH_MUL  \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
+	movdqu \TMP7, \TMP6
 
-.if (\i == 5) || (\i == 6) || (\i == 7)
-	MOVADQ		ONE(%RIP),\TMP1
-	MOVADQ		(%arg1),\TMP2
-.irpc index, \i_seq
-	paddd	   \TMP1, \XMM0                 # INCR Y0
-	movdqa	   \XMM0, %xmm\index
-	PSHUFB_XMM   %xmm14, %xmm\index      # perform a 16 byte swap
-	pxor	   \TMP2, %xmm\index
-.endr
-	lea	0x10(%arg1),%r10
-	mov	keysize,%eax
-	shr	$2,%eax				# 128->4, 192->6, 256->8
-	add	$5,%eax			      # 128->9, 192->11, 256->13
-
-aes_loop_initial_dec\num_initial_blocks:
-	MOVADQ	(%r10),\TMP1
-.irpc	index, \i_seq
-	AESENC	\TMP1, %xmm\index
-.endr
-	add	$16,%r10
-	sub	$1,%eax
-	jnz	aes_loop_initial_dec\num_initial_blocks
-
-	MOVADQ	(%r10), \TMP1
-.irpc index, \i_seq
-	AESENCLAST \TMP1, %xmm\index         # Last Round
-.endr
-.irpc index, \i_seq
-	movdqu	   (%arg3 , %r11, 1), \TMP1
-	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg2 , %r11, 1)
-	# write back plaintext/ciphertext for num_initial_blocks
-	add	   $16, %r11
-
-	movdqa     \TMP1, %xmm\index
-	PSHUFB_XMM	   %xmm14, %xmm\index
-                # prepare plaintext/ciphertext for GHASH computation
-.endr
-.endif
-
-        # apply GHASH on num_initial_blocks blocks
+_get_AAD_done\@:
+	movdqu \TMP6, AadHash(%arg2)
+.endm
 
-.if \i == 5
-        pxor       %xmm5, %xmm6
-	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 6
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 7
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
+# between update calls.
+# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
+# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
+# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
+.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+	AAD_HASH operation
+	mov 	PBlockLen(%arg2), %r13
+	cmp	$0, %r13
+	je	_partial_block_done_\@	# Leave Macro if no partial blocks
+	# Read in input data without over reading
+	cmp	$16, \PLAIN_CYPH_LEN
+	jl	_fewer_than_16_bytes_\@
+	movups	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
+	jmp	_data_read_\@
+
+_fewer_than_16_bytes_\@:
+	lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
+	mov	\PLAIN_CYPH_LEN, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
+
+	mov PBlockLen(%arg2), %r13
+
+_data_read_\@:				# Finished reading in data
+
+	movdqu	PBlockEncKey(%arg2), %xmm9
+	movdqu	HashKey(%arg2), %xmm13
+
+	lea	SHIFT_MASK(%rip), %r12
+
+	# adjust the shuffle mask pointer to be able to shift r13 bytes
+	# r16-r13 is the number of bytes in plaintext mod 16)
+	add	%r13, %r12
+	movdqu	(%r12), %xmm2		# get the appropriate shuffle mask
+	PSHUFB_XMM %xmm2, %xmm9		# shift right r13 bytes
+
+.ifc \operation, dec
+	movdqa	%xmm1, %xmm3
+	pxor	%xmm1, %xmm9		# Cyphertext XOR E(K, Yn)
+
+	mov	\PLAIN_CYPH_LEN, %r10
+	add	%r13, %r10
+	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+	sub	$16, %r10
+	# Determine if if partial block is not being filled and
+	# shift mask accordingly
+	jge	_no_extra_mask_1_\@
+	sub	%r10, %r12
+_no_extra_mask_1_\@:
+
+	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	# get the appropriate mask to mask out bottom r13 bytes of xmm9
+	pand	%xmm1, %xmm9		# mask out bottom r13 bytes of xmm9
+
+	pand	%xmm1, %xmm3
+	movdqa	SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM	%xmm10, %xmm3
+	PSHUFB_XMM	%xmm2, %xmm3
+	pxor	%xmm3, \AAD_HASH
+
+	cmp	$0, %r10
+	jl	_partial_incomplete_1_\@
+
+	# GHASH computation for the last <16 Byte block
+	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+	xor	%rax,%rax
+
+	mov	%rax, PBlockLen(%arg2)
+	jmp	_dec_done_\@
+_partial_incomplete_1_\@:
+	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_dec_done_\@:
+	movdqu	\AAD_HASH, AadHash(%arg2)
+.else
+	pxor	%xmm1, %xmm9			# Plaintext XOR E(K, Yn)
+
+	mov	\PLAIN_CYPH_LEN, %r10
+	add	%r13, %r10
+	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+	sub	$16, %r10
+	# Determine if if partial block is not being filled and
+	# shift mask accordingly
+	jge	_no_extra_mask_2_\@
+	sub	%r10, %r12
+_no_extra_mask_2_\@:
+
+	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	# get the appropriate mask to mask out bottom r13 bytes of xmm9
+	pand	%xmm1, %xmm9
+
+	movdqa	SHUF_MASK(%rip), %xmm1
+	PSHUFB_XMM %xmm1, %xmm9
+	PSHUFB_XMM %xmm2, %xmm9
+	pxor	%xmm9, \AAD_HASH
+
+	cmp	$0, %r10
+	jl	_partial_incomplete_2_\@
+
+	# GHASH computation for the last <16 Byte block
+	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+	xor	%rax,%rax
+
+	mov	%rax, PBlockLen(%arg2)
+	jmp	_encode_done_\@
+_partial_incomplete_2_\@:
+	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_encode_done_\@:
+	movdqu	\AAD_HASH, AadHash(%arg2)
+
+	movdqa	SHUF_MASK(%rip), %xmm10
+	# shuffle xmm9 back to output as ciphertext
+	PSHUFB_XMM	%xmm10, %xmm9
+	PSHUFB_XMM	%xmm2, %xmm9
 .endif
-	cmp	   $64, %r13
-	jl	_initial_blocks_done\num_initial_blocks\operation
-	# no need for precomputed values
-/*
-*
-* Precomputations for HashKey parallel with encryption of first 4 blocks.
-* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-*/
-	MOVADQ	   ONE(%rip), \TMP1
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM1
-	PSHUFB_XMM  %xmm14, \XMM1        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM2
-	PSHUFB_XMM  %xmm14, \XMM2        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM3
-	PSHUFB_XMM %xmm14, \XMM3        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM4
-	PSHUFB_XMM %xmm14, \XMM4        # perform a 16 byte swap
-
-	MOVADQ	   0(%arg1),\TMP1
-	pxor	   \TMP1, \XMM1
-	pxor	   \TMP1, \XMM2
-	pxor	   \TMP1, \XMM3
-	pxor	   \TMP1, \XMM4
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqa	   \TMP1, HashKey_k(%rsp)
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_2_k(%rsp)
-.irpc index, 1234 # do 4 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_3(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_3_k(%rsp)
-.irpc index, 56789 # do next 5 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_4(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_4_k(%rsp)
-	lea	   0xa0(%arg1),%r10
-	mov	   keysize,%eax
-	shr	   $2,%eax			# 128->4, 192->6, 256->8
-	sub	   $4,%eax			# 128->0, 192->2, 256->4
-	jz	   aes_loop_pre_dec_done\num_initial_blocks
-
-aes_loop_pre_dec\num_initial_blocks:
-	MOVADQ	   (%r10),\TMP2
-.irpc	index, 1234
-	AESENC	   \TMP2, %xmm\index
-.endr
-	add	   $16,%r10
-	sub	   $1,%eax
-	jnz	   aes_loop_pre_dec\num_initial_blocks
-
-aes_loop_pre_dec_done\num_initial_blocks:
-	MOVADQ	   (%r10), \TMP2
-	AESENCLAST \TMP2, \XMM1
-	AESENCLAST \TMP2, \XMM2
-	AESENCLAST \TMP2, \XMM3
-	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM1
-	movdqu	   \XMM1, 16*0(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM1
-	movdqu	   16*1(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM2
-	movdqu	   \XMM2, 16*1(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM2
-	movdqu	   16*2(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM3
-	movdqu	   \XMM3, 16*2(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM3
-	movdqu	   16*3(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM4
-	movdqu	   \XMM4, 16*3(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM4
-	add	   $64, %r11
-	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
-	pxor	   \XMMDst, \XMM1
-# combine GHASHed value with the corresponding ciphertext
-	PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
-	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
-	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
-
-_initial_blocks_done\num_initial_blocks\operation:
-
-.endm
+	# output encrypted Bytes
+	cmp	$0, %r10
+	jl	_partial_fill_\@
+	mov	%r13, %r12
+	mov	$16, %r13
+	# Set r13 to be the number of bytes to write out
+	sub	%r12, %r13
+	jmp	_count_set_\@
+_partial_fill_\@:
+	mov	\PLAIN_CYPH_LEN, %r13
+_count_set_\@:
+	movdqa	%xmm9, %xmm0
+	MOVQ_R64_XMM	%xmm0, %rax
+	cmp	$8, %r13
+	jle	_less_than_8_bytes_left_\@
 
+	mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+	add	$8, \DATA_OFFSET
+	psrldq	$8, %xmm0
+	MOVQ_R64_XMM	%xmm0, %rax
+	sub	$8, %r13
+_less_than_8_bytes_left_\@:
+	movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+	add	$1, \DATA_OFFSET
+	shr	$8, %rax
+	sub	$1, %r13
+	jne	_less_than_8_bytes_left_\@
+_partial_block_done_\@:
+.endm # PARTIAL_BLOCK
 
 /*
 * if a = number of total plaintext bytes
@@ -499,49 +789,19 @@ _initial_blocks_done\num_initial_blocks\operation:
 * the ciphertext
 * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
 * are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
+* arg1, %arg2, %arg3 are used as a pointer only, not modified
 */
 
 
-.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-        MOVADQ     SHUF_MASK(%rip), %xmm14
-	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r11           # %r11 = aadLen
-	pxor	   %xmm\i, %xmm\i
-	pxor	   \XMM2, \XMM2
+.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
+	XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
+	MOVADQ		SHUF_MASK(%rip), %xmm14
 
-	cmp	   $16, %r11
-	jl	   _get_AAD_rest\num_initial_blocks\operation
-_get_AAD_blocks\num_initial_blocks\operation:
-	movdqu	   (%r10), %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   %xmm\i, \XMM2
-	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-	add	   $16, %r10
-	sub	   $16, %r11
-	cmp	   $16, %r11
-	jge	   _get_AAD_blocks\num_initial_blocks\operation
+	movdqu AadHash(%arg2), %xmm\i		    # XMM0 = Y0
 
-	movdqu	   \XMM2, %xmm\i
-
-	/* read the last <16B of AAD */
-_get_AAD_rest\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	je	   _get_AAD_done\num_initial_blocks\operation
-
-	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   \XMM2, %xmm\i
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-
-_get_AAD_done\num_initial_blocks\operation:
-	xor	   %r11, %r11 # initialise the data pointer offset as zero
 	# start AES for num_initial_blocks blocks
 
-	mov	   %arg5, %rax                      # %rax = *Y0
-	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
-	PSHUFB_XMM   %xmm14, \XMM0
+	movdqu CurCount(%arg2), \XMM0                # XMM0 = Y0
 
 .if (\i == 5) || (\i == 6) || (\i == 7)
 
@@ -549,7 +809,11 @@ _get_AAD_done\num_initial_blocks\operation:
 	MOVADQ		0(%arg1),\TMP2
 .irpc index, \i_seq
 	paddd		\TMP1, \XMM0                 # INCR Y0
+.ifc \operation, dec
+        movdqa     \XMM0, %xmm\index
+.else
 	MOVADQ		\XMM0, %xmm\index
+.endif
 	PSHUFB_XMM	%xmm14, %xmm\index      # perform a 16 byte swap
 	pxor		\TMP2, %xmm\index
 .endr
@@ -558,25 +822,29 @@ _get_AAD_done\num_initial_blocks\operation:
 	shr	$2,%eax				# 128->4, 192->6, 256->8
 	add	$5,%eax			      # 128->9, 192->11, 256->13
 
-aes_loop_initial_enc\num_initial_blocks:
+aes_loop_initial_\@:
 	MOVADQ	(%r10),\TMP1
 .irpc	index, \i_seq
 	AESENC	\TMP1, %xmm\index
 .endr
 	add	$16,%r10
 	sub	$1,%eax
-	jnz	aes_loop_initial_enc\num_initial_blocks
+	jnz	aes_loop_initial_\@
 
 	MOVADQ	(%r10), \TMP1
 .irpc index, \i_seq
 	AESENCLAST \TMP1, %xmm\index         # Last Round
 .endr
 .irpc index, \i_seq
-	movdqu	   (%arg3 , %r11, 1), \TMP1
+	movdqu	   (%arg4 , %r11, 1), \TMP1
 	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg2 , %r11, 1)
+	movdqu	   %xmm\index, (%arg3 , %r11, 1)
 	# write back plaintext/ciphertext for num_initial_blocks
 	add	   $16, %r11
+
+.ifc \operation, dec
+	movdqa     \TMP1, %xmm\index
+.endif
 	PSHUFB_XMM	   %xmm14, %xmm\index
 
 		# prepare plaintext/ciphertext for GHASH computation
@@ -602,7 +870,7 @@ aes_loop_initial_enc\num_initial_blocks:
 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 .endif
 	cmp	   $64, %r13
-	jl	_initial_blocks_done\num_initial_blocks\operation
+	jl	_initial_blocks_done\@
 	# no need for precomputed values
 /*
 *
@@ -631,17 +899,6 @@ aes_loop_initial_enc\num_initial_blocks:
 	pxor	   \TMP1, \XMM2
 	pxor	   \TMP1, \XMM3
 	pxor	   \TMP1, \XMM4
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqa	   \TMP1, HashKey_k(%rsp)
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_2_k(%rsp)
 .irpc index, 1234 # do 4 rounds
 	movaps 0x10*\index(%arg1), \TMP1
 	AESENC	   \TMP1, \XMM1
@@ -649,12 +906,6 @@ aes_loop_initial_enc\num_initial_blocks:
 	AESENC	   \TMP1, \XMM3
 	AESENC	   \TMP1, \XMM4
 .endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_3(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_3_k(%rsp)
 .irpc index, 56789 # do next 5 rounds
 	movaps 0x10*\index(%arg1), \TMP1
 	AESENC	   \TMP1, \XMM1
@@ -662,45 +913,56 @@ aes_loop_initial_enc\num_initial_blocks:
 	AESENC	   \TMP1, \XMM3
 	AESENC	   \TMP1, \XMM4
 .endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_4(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_4_k(%rsp)
 	lea	   0xa0(%arg1),%r10
 	mov	   keysize,%eax
 	shr	   $2,%eax			# 128->4, 192->6, 256->8
 	sub	   $4,%eax			# 128->0, 192->2, 256->4
-	jz	   aes_loop_pre_enc_done\num_initial_blocks
+	jz	   aes_loop_pre_done\@
 
-aes_loop_pre_enc\num_initial_blocks:
+aes_loop_pre_\@:
 	MOVADQ	   (%r10),\TMP2
 .irpc	index, 1234
 	AESENC	   \TMP2, %xmm\index
 .endr
 	add	   $16,%r10
 	sub	   $1,%eax
-	jnz	   aes_loop_pre_enc\num_initial_blocks
+	jnz	   aes_loop_pre_\@
 
-aes_loop_pre_enc_done\num_initial_blocks:
+aes_loop_pre_done\@:
 	MOVADQ	   (%r10), \TMP2
 	AESENCLAST \TMP2, \XMM1
 	AESENCLAST \TMP2, \XMM2
 	AESENCLAST \TMP2, \XMM3
 	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg3 , %r11 , 1), \TMP1
+	movdqu	   16*0(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM1
-	movdqu	   16*1(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM1
+.endif
+	movdqu	   16*1(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM2
-	movdqu	   16*2(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM2
+.endif
+	movdqu	   16*2(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM3
-	movdqu	   16*3(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM3
+.endif
+	movdqu	   16*3(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM4
-	movdqu     \XMM1, 16*0(%arg2 , %r11 , 1)
-	movdqu     \XMM2, 16*1(%arg2 , %r11 , 1)
-	movdqu     \XMM3, 16*2(%arg2 , %r11 , 1)
-	movdqu     \XMM4, 16*3(%arg2 , %r11 , 1)
+.ifc \operation, dec
+	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM4
+.else
+	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
+	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
+	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
+	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
+.endif
 
 	add	   $64, %r11
 	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
@@ -710,14 +972,14 @@ aes_loop_pre_enc_done\num_initial_blocks:
 	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
 	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
 
-_initial_blocks_done\num_initial_blocks\operation:
+_initial_blocks_done\@:
 
 .endm
 
 /*
 * encrypt 4 blocks at a time
 * ghash the 4 previously encrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
+* arg1, %arg3, %arg4 are used as pointers only, not modified
 * %r11 is the data offset value
 */
 .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -735,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pshufd	  $78, \XMM5, \TMP6
 	pxor	  \XMM5, \TMP6
 	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
 	movdqa    \XMM0, \XMM1
 	paddd     ONE(%rip), \XMM0		# INCR CNT
@@ -754,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pxor	  (%arg1), \XMM2
 	pxor	  (%arg1), \XMM3
 	pxor	  (%arg1), \XMM4
-	movdqa	  HashKey_4_k(%rsp), \TMP5
+	movdqa	  HashKey_4_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
 	movaps 0x10(%arg1), \TMP1
 	AESENC	  \TMP1, \XMM1              # Round 1
@@ -769,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM6, \TMP1
 	pshufd	  $78, \XMM6, \TMP2
 	pxor	  \XMM6, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
 	movaps 0x30(%arg1), \TMP3
 	AESENC    \TMP3, \XMM1              # Round 3
@@ -782,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_3_k(%rsp), \TMP5
+	movdqa	  HashKey_3_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x50(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1              # Round 5
@@ -796,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM7, \TMP1
 	pshufd	  $78, \XMM7, \TMP2
 	pxor	  \XMM7, \TMP2
-	movdqa	  HashKey_2(%rsp ), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 
         # Multiply TMP5 * HashKey using karatsuba
 
@@ -812,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_2_k(%rsp), \TMP5
+	movdqa	  HashKey_2_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x80(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1             # Round 8
@@ -830,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM8, \TMP1
 	pshufd	  $78, \XMM8, \TMP2
 	pxor	  \XMM8, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
 	movaps 0x90(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1            # Round 9
@@ -842,37 +1104,37 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	mov	  keysize,%eax
 	shr	  $2,%eax			# 128->4, 192->6, 256->8
 	sub	  $4,%eax			# 128->0, 192->2, 256->4
-	jz	  aes_loop_par_enc_done
+	jz	  aes_loop_par_enc_done\@
 
-aes_loop_par_enc:
+aes_loop_par_enc\@:
 	MOVADQ	  (%r10),\TMP3
 .irpc	index, 1234
 	AESENC	  \TMP3, %xmm\index
 .endr
 	add	  $16,%r10
 	sub	  $1,%eax
-	jnz	  aes_loop_par_enc
+	jnz	  aes_loop_par_enc\@
 
-aes_loop_par_enc_done:
+aes_loop_par_enc_done\@:
 	MOVADQ	  (%r10), \TMP3
 	AESENCLAST \TMP3, \XMM1           # Round 10
 	AESENCLAST \TMP3, \XMM2
 	AESENCLAST \TMP3, \XMM3
 	AESENCLAST \TMP3, \XMM4
-	movdqa    HashKey_k(%rsp), \TMP5
+	movdqa    HashKey_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg3,%r11,1), \TMP3
+	movdqu	  (%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  16(%arg3,%r11,1), \TMP3
+	movdqu	  16(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  32(%arg3,%r11,1), \TMP3
+	movdqu	  32(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  48(%arg3,%r11,1), \TMP3
+	movdqu	  48(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-        movdqu    \XMM1, (%arg2,%r11,1)        # Write to the ciphertext buffer
-        movdqu    \XMM2, 16(%arg2,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM3, 32(%arg2,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM4, 48(%arg2,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM1, (%arg3,%r11,1)        # Write to the ciphertext buffer
+        movdqu    \XMM2, 16(%arg3,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM3, 32(%arg3,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM4, 48(%arg3,%r11,1)      # Write to the ciphertext buffer
 	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
@@ -925,7 +1187,7 @@ aes_loop_par_enc_done:
 /*
 * decrypt 4 blocks at a time
 * ghash the 4 previously decrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
+* arg1, %arg3, %arg4 are used as pointers only, not modified
 * %r11 is the data offset value
 */
 .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -943,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pshufd	  $78, \XMM5, \TMP6
 	pxor	  \XMM5, \TMP6
 	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
 	movdqa    \XMM0, \XMM1
 	paddd     ONE(%rip), \XMM0		# INCR CNT
@@ -962,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pxor	  (%arg1), \XMM2
 	pxor	  (%arg1), \XMM3
 	pxor	  (%arg1), \XMM4
-	movdqa	  HashKey_4_k(%rsp), \TMP5
+	movdqa	  HashKey_4_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
 	movaps 0x10(%arg1), \TMP1
 	AESENC	  \TMP1, \XMM1              # Round 1
@@ -977,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM6, \TMP1
 	pshufd	  $78, \XMM6, \TMP2
 	pxor	  \XMM6, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
 	movaps 0x30(%arg1), \TMP3
 	AESENC    \TMP3, \XMM1              # Round 3
@@ -990,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_3_k(%rsp), \TMP5
+	movdqa	  HashKey_3_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x50(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1              # Round 5
@@ -1004,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM7, \TMP1
 	pshufd	  $78, \XMM7, \TMP2
 	pxor	  \XMM7, \TMP2
-	movdqa	  HashKey_2(%rsp ), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 
         # Multiply TMP5 * HashKey using karatsuba
 
@@ -1020,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_2_k(%rsp), \TMP5
+	movdqa	  HashKey_2_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x80(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1             # Round 8
@@ -1038,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM8, \TMP1
 	pshufd	  $78, \XMM8, \TMP2
 	pxor	  \XMM8, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
 	movaps 0x90(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1            # Round 9
@@ -1050,40 +1312,40 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	mov	  keysize,%eax
 	shr	  $2,%eax		        # 128->4, 192->6, 256->8
 	sub	  $4,%eax			# 128->0, 192->2, 256->4
-	jz	  aes_loop_par_dec_done
+	jz	  aes_loop_par_dec_done\@
 
-aes_loop_par_dec:
+aes_loop_par_dec\@:
 	MOVADQ	  (%r10),\TMP3
 .irpc	index, 1234
 	AESENC	  \TMP3, %xmm\index
 .endr
 	add	  $16,%r10
 	sub	  $1,%eax
-	jnz	  aes_loop_par_dec
+	jnz	  aes_loop_par_dec\@
 
-aes_loop_par_dec_done:
+aes_loop_par_dec_done\@:
 	MOVADQ	  (%r10), \TMP3
 	AESENCLAST \TMP3, \XMM1           # last round
 	AESENCLAST \TMP3, \XMM2
 	AESENCLAST \TMP3, \XMM3
 	AESENCLAST \TMP3, \XMM4
-	movdqa    HashKey_k(%rsp), \TMP5
+	movdqa    HashKey_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg3,%r11,1), \TMP3
+	movdqu	  (%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM1, (%arg2,%r11,1)        # Write to plaintext buffer
+	movdqu	  \XMM1, (%arg3,%r11,1)        # Write to plaintext buffer
 	movdqa    \TMP3, \XMM1
-	movdqu	  16(%arg3,%r11,1), \TMP3
+	movdqu	  16(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM2, 16(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM2, 16(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM2
-	movdqu	  32(%arg3,%r11,1), \TMP3
+	movdqu	  32(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM3, 32(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM3, 32(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM3
-	movdqu	  48(%arg3,%r11,1), \TMP3
+	movdqu	  48(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM4, 48(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM4, 48(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM4
 	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
@@ -1143,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM1, \TMP6
 	pshufd	  $78, \XMM1, \TMP2
 	pxor	  \XMM1, \TMP2
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
-	movdqa	  HashKey_4_k(%rsp), \TMP4
+	movdqa	  HashKey_4_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	movdqa	  \XMM1, \XMMDst
 	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
@@ -1156,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM2, \TMP1
 	pshufd	  $78, \XMM2, \TMP2
 	pxor	  \XMM2, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
-	movdqa	  HashKey_3_k(%rsp), \TMP4
+	movdqa	  HashKey_3_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM2, \XMMDst
@@ -1171,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM3, \TMP1
 	pshufd	  $78, \XMM3, \TMP2
 	pxor	  \XMM3, \TMP2
-	movdqa	  HashKey_2(%rsp), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
-	movdqa	  HashKey_2_k(%rsp), \TMP4
+	movdqa	  HashKey_2_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM3, \XMMDst
@@ -1184,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM4, \TMP1
 	pshufd	  $78, \XMM4, \TMP2
 	pxor	  \XMM4, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
-	movdqa	  HashKey_k(%rsp), \TMP4
+	movdqa	  HashKey_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM4, \XMMDst
@@ -1256,6 +1518,8 @@ _esb_loop_\@:
 .endm
 /*****************************************************************************
 * void aesni_gcm_dec(void *aes_ctx,    // AES Key schedule. Starts on a 16 byte boundary.
+*                   struct gcm_context_data *data
+*                                      // Context data
 *                   u8 *out,           // Plaintext output. Encrypt in-place is allowed.
 *                   const u8 *in,      // Ciphertext input
 *                   u64 plaintext_len, // Length of data in bytes for decryption.
@@ -1333,195 +1597,20 @@ _esb_loop_\@:
 *
 *****************************************************************************/
 ENTRY(aesni_gcm_dec)
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	%rsp, %r14
-/*
-* states of %xmm registers %xmm6:%xmm15 not saved
-* all %xmm registers are clobbered
-*/
-	sub	$VARIABLE_OFFSET, %rsp
-	and	$~63, %rsp                        # align rsp to 64 bytes
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13			  # %xmm13 = HashKey
-        movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm13
-
-
-# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH)
-
-	movdqa	%xmm13, %xmm2
-	psllq	$1, %xmm13
-	psrlq	$63, %xmm2
-	movdqa	%xmm2, %xmm1
-	pslldq	$8, %xmm2
-	psrldq	$8, %xmm1
-	por	%xmm2, %xmm13
-
-        # Reduction
-
-	pshufd	$0x24, %xmm1, %xmm2
-	pcmpeqd TWOONE(%rip), %xmm2
-	pand	POLY(%rip), %xmm2
-	pxor	%xmm2, %xmm13     # %xmm13 holds the HashKey<<1 (mod poly)
-
-
-        # Decrypt first few blocks
-
-	movdqa %xmm13, HashKey(%rsp)           # store HashKey<<1 (mod poly)
-	mov %arg4, %r13    # save the number of bytes of plaintext/ciphertext
-	and $-16, %r13                      # %r13 = %r13 - (%r13 mod 16)
-	mov %r13, %r12
-	and $(3<<4), %r12
-	jz _initial_num_blocks_is_0_decrypt
-	cmp $(2<<4), %r12
-	jb _initial_num_blocks_is_1_decrypt
-	je _initial_num_blocks_is_2_decrypt
-_initial_num_blocks_is_3_decrypt:
-	INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec
-	sub	$48, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_2_decrypt:
-	INITIAL_BLOCKS_DEC	2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec
-	sub	$32, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_1_decrypt:
-	INITIAL_BLOCKS_DEC	1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec
-	sub	$16, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_0_decrypt:
-	INITIAL_BLOCKS_DEC	0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec
-_initial_blocks_decrypted:
-	cmp	$0, %r13
-	je	_zero_cipher_left_decrypt
-	sub	$64, %r13
-	je	_four_cipher_left_decrypt
-_decrypt_by_4:
-	GHASH_4_ENCRYPT_4_PARALLEL_DEC	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_decrypt_by_4
-_four_cipher_left_decrypt:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_decrypt:
-	mov	%arg4, %r13
-	and	$15, %r13				# %r13 = arg4 (mod 16)
-	je	_multiple_of_16_bytes_decrypt
-
-        # Handle the last <16 byte block separately
-
-	paddd ONE(%rip), %xmm0         # increment CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1    # E(K, Yn)
-
-	lea (%arg3,%r11,1), %r10
-	mov %r13, %r12
-	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
-	lea ALL_F+16(%rip), %r12
-	sub %r13, %r12
-	movdqa  %xmm1, %xmm2
-	pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
-	movdqu (%r12), %xmm1
-	# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
-	pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
-	pand    %xmm1, %xmm2
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10 ,%xmm2
-
-	pxor %xmm2, %xmm8
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-
-        # output %r13 bytes
-	MOVQ_R64_XMM	%xmm0, %rax
-	cmp	$8, %r13
-	jle	_less_than_8_bytes_left_decrypt
-	mov	%rax, (%arg2 , %r11, 1)
-	add	$8, %r11
-	psrldq	$8, %xmm0
-	MOVQ_R64_XMM	%xmm0, %rax
-	sub	$8, %r13
-_less_than_8_bytes_left_decrypt:
-	mov	%al,  (%arg2, %r11, 1)
-	add	$1, %r11
-	shr	$8, %rax
-	sub	$1, %r13
-	jne	_less_than_8_bytes_left_decrypt
-_multiple_of_16_bytes_decrypt:
-	mov	arg8, %r12		  # %r13 = aadLen (number of bytes)
-	shl	$3, %r12		  # convert into number of bits
-	movd	%r12d, %xmm15		  # len(A) in %xmm15
-	shl	$3, %arg4		  # len(C) in bits (*128)
-	MOVQ_R64_XMM	%arg4, %xmm1
-	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	         # final GHASH computation
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8
+	FUNC_SAVE
 
-	mov	%arg5, %rax		  # %rax = *Y0
-	movdqu	(%rax), %xmm0		  # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_decrypt:
-	mov	arg9, %r10                # %r10 = authTag
-	mov	arg10, %r11               # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_decrypt
-	cmp	$8, %r11
-	jl	_T_4_decrypt
-_T_8_decrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	add	$8, %r10
-	sub	$8, %r11
-	psrldq	$8, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_decrypt
-_T_4_decrypt:
-	movd	%xmm0, %eax
-	mov	%eax, (%r10)
-	add	$4, %r10
-	sub	$4, %r11
-	psrldq	$4, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_decrypt
-_T_123_decrypt:
-	movd	%xmm0, %eax
-	cmp	$2, %r11
-	jl	_T_1_decrypt
-	mov	%ax, (%r10)
-	cmp	$2, %r11
-	je	_return_T_done_decrypt
-	add	$2, %r10
-	sar	$16, %eax
-_T_1_decrypt:
-	mov	%al, (%r10)
-	jmp	_return_T_done_decrypt
-_T_16_decrypt:
-	movdqu	%xmm0, (%r10)
-_return_T_done_decrypt:
-	mov	%r14, %rsp
-	pop	%r14
-	pop	%r13
-	pop	%r12
+	GCM_INIT %arg6, arg7, arg8, arg9
+	GCM_ENC_DEC dec
+	GCM_COMPLETE arg10, arg11
+	FUNC_RESTORE
 	ret
 ENDPROC(aesni_gcm_dec)
 
 
 /*****************************************************************************
 * void aesni_gcm_enc(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data
+*                                        // Context data
 *                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
 *                    const u8 *in,       // Plaintext input
 *                    u64 plaintext_len,  // Length of data in bytes for encryption.
@@ -1596,195 +1685,78 @@ ENDPROC(aesni_gcm_dec)
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 ***************************************************************************/
 ENTRY(aesni_gcm_enc)
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	%rsp, %r14
-#
-# states of %xmm registers %xmm6:%xmm15 not saved
-# all %xmm registers are clobbered
-#
-	sub	$VARIABLE_OFFSET, %rsp
-	and	$~63, %rsp
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13
-        movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm13
-
-
-# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
-
-	movdqa	%xmm13, %xmm2
-	psllq	$1, %xmm13
-	psrlq	$63, %xmm2
-	movdqa	%xmm2, %xmm1
-	pslldq	$8, %xmm2
-	psrldq	$8, %xmm1
-	por	%xmm2, %xmm13
-
-        # reduce HashKey<<1
-
-	pshufd	$0x24, %xmm1, %xmm2
-	pcmpeqd TWOONE(%rip), %xmm2
-	pand	POLY(%rip), %xmm2
-	pxor	%xmm2, %xmm13
-	movdqa	%xmm13, HashKey(%rsp)
-	mov	%arg4, %r13            # %xmm13 holds HashKey<<1 (mod poly)
-	and	$-16, %r13
-	mov	%r13, %r12
+	FUNC_SAVE
 
-        # Encrypt first few blocks
+	GCM_INIT %arg6, arg7, arg8, arg9
+	GCM_ENC_DEC enc
 
-	and	$(3<<4), %r12
-	jz	_initial_num_blocks_is_0_encrypt
-	cmp	$(2<<4), %r12
-	jb	_initial_num_blocks_is_1_encrypt
-	je	_initial_num_blocks_is_2_encrypt
-_initial_num_blocks_is_3_encrypt:
-	INITIAL_BLOCKS_ENC	3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc
-	sub	$48, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_2_encrypt:
-	INITIAL_BLOCKS_ENC	2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc
-	sub	$32, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_1_encrypt:
-	INITIAL_BLOCKS_ENC	1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc
-	sub	$16, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_0_encrypt:
-	INITIAL_BLOCKS_ENC	0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc
-_initial_blocks_encrypted:
-
-        # Main loop - Encrypt remaining blocks
-
-	cmp	$0, %r13
-	je	_zero_cipher_left_encrypt
-	sub	$64, %r13
-	je	_four_cipher_left_encrypt
-_encrypt_by_4_encrypt:
-	GHASH_4_ENCRYPT_4_PARALLEL_ENC	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_encrypt_by_4_encrypt
-_four_cipher_left_encrypt:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_encrypt:
-	mov	%arg4, %r13
-	and	$15, %r13			# %r13 = arg4 (mod 16)
-	je	_multiple_of_16_bytes_encrypt
-
-         # Handle the last <16 Byte block separately
-	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
-
-	lea (%arg3,%r11,1), %r10
-	mov %r13, %r12
-	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
-	lea ALL_F+16(%rip), %r12
-	sub %r13, %r12
-	pxor	%xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
-	movdqu	(%r12), %xmm1
-	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
-	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10,%xmm0
+	GCM_COMPLETE arg10, arg11
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_enc)
 
-	pxor	%xmm0, %xmm8
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# GHASH computation for the last <16 byte block
-	movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
+/*****************************************************************************
+* void aesni_gcm_init(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                     struct gcm_context_data *data,
+*                                         // context data
+*                     u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
+*                                         // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
+*                                         // concatenated with 0x00000001. 16-byte aligned pointer.
+*                     u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
+*                     const u8 *aad,      // Additional Authentication Data (AAD)
+*                     u64 aad_len)        // Length of AAD in bytes.
+*/
+ENTRY(aesni_gcm_init)
+	FUNC_SAVE
+	GCM_INIT %arg3, %arg4,%arg5, %arg6
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_init)
 
-	# shuffle xmm0 back to output as ciphertext
+/*****************************************************************************
+* void aesni_gcm_enc_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
+*                    const u8 *in,       // Plaintext input
+*                    u64 plaintext_len,  // Length of data in bytes for encryption.
+*/
+ENTRY(aesni_gcm_enc_update)
+	FUNC_SAVE
+	GCM_ENC_DEC enc
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_enc_update)
 
-        # Output %r13 bytes
-	MOVQ_R64_XMM %xmm0, %rax
-	cmp $8, %r13
-	jle _less_than_8_bytes_left_encrypt
-	mov %rax, (%arg2 , %r11, 1)
-	add $8, %r11
-	psrldq $8, %xmm0
-	MOVQ_R64_XMM %xmm0, %rax
-	sub $8, %r13
-_less_than_8_bytes_left_encrypt:
-	mov %al,  (%arg2, %r11, 1)
-	add $1, %r11
-	shr $8, %rax
-	sub $1, %r13
-	jne _less_than_8_bytes_left_encrypt
-_multiple_of_16_bytes_encrypt:
-	mov	arg8, %r12    # %r12 = addLen (number of bytes)
-	shl	$3, %r12
-	movd	%r12d, %xmm15       # len(A) in %xmm15
-	shl	$3, %arg4               # len(C) in bits (*128)
-	MOVQ_R64_XMM	%arg4, %xmm1
-	pslldq	$8, %xmm15          # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15       # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# final GHASH computation
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8         # perform a 16 byte swap
+/*****************************************************************************
+* void aesni_gcm_dec_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
+*                    const u8 *in,       // Plaintext input
+*                    u64 plaintext_len,  // Length of data in bytes for encryption.
+*/
+ENTRY(aesni_gcm_dec_update)
+	FUNC_SAVE
+	GCM_ENC_DEC dec
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_dec_update)
 
-	mov	%arg5, %rax		       # %rax  = *Y0
-	movdqu	(%rax), %xmm0		       # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm15         # Encrypt(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_encrypt:
-	mov	arg9, %r10                     # %r10 = authTag
-	mov	arg10, %r11                    # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_encrypt
-	cmp	$8, %r11
-	jl	_T_4_encrypt
-_T_8_encrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	add	$8, %r10
-	sub	$8, %r11
-	psrldq	$8, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_encrypt
-_T_4_encrypt:
-	movd	%xmm0, %eax
-	mov	%eax, (%r10)
-	add	$4, %r10
-	sub	$4, %r11
-	psrldq	$4, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_encrypt
-_T_123_encrypt:
-	movd	%xmm0, %eax
-	cmp	$2, %r11
-	jl	_T_1_encrypt
-	mov	%ax, (%r10)
-	cmp	$2, %r11
-	je	_return_T_done_encrypt
-	add	$2, %r10
-	sar	$16, %eax
-_T_1_encrypt:
-	mov	%al, (%r10)
-	jmp	_return_T_done_encrypt
-_T_16_encrypt:
-	movdqu	%xmm0, (%r10)
-_return_T_done_encrypt:
-	mov	%r14, %rsp
-	pop	%r14
-	pop	%r13
-	pop	%r12
+/*****************************************************************************
+* void aesni_gcm_finalize(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *auth_tag,       // Authenticated Tag output.
+*                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
+*                                        // 12 or 8.
+*/
+ENTRY(aesni_gcm_finalize)
+	FUNC_SAVE
+	GCM_COMPLETE %arg3 %arg4
+	FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_enc)
+ENDPROC(aesni_gcm_finalize)
 
 #endif
 
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 34cf1c1f8c98..acbe7e8336d8 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -72,6 +72,21 @@ struct aesni_xts_ctx {
 	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 };
 
+#define GCM_BLOCK_LEN 16
+
+struct gcm_context_data {
+	/* init, update and finalize context data */
+	u8 aad_hash[GCM_BLOCK_LEN];
+	u64 aad_length;
+	u64 in_length;
+	u8 partial_block_enc_key[GCM_BLOCK_LEN];
+	u8 orig_IV[GCM_BLOCK_LEN];
+	u8 current_counter[GCM_BLOCK_LEN];
+	u64 partial_block_len;
+	u64 unused;
+	u8 hash_keys[GCM_BLOCK_LEN * 8];
+};
+
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 			     unsigned int key_len);
 asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -105,6 +120,7 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
 
 /* asmlinkage void aesni_gcm_enc()
  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
+ * struct gcm_context_data.  May be uninitialized.
  * u8 *out, Ciphertext output. Encrypt in-place is allowed.
  * const u8 *in, Plaintext input
  * unsigned long plaintext_len, Length of data in bytes for encryption.
@@ -117,13 +133,15 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
  * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
  *          Valid values are 16 (most likely), 12 or 8.
  */
-asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_enc(void *ctx,
+			struct gcm_context_data *gdata, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
 /* asmlinkage void aesni_gcm_dec()
  * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
+ * struct gcm_context_data.  May be uninitialized.
  * u8 *out, Plaintext output. Decrypt in-place is allowed.
  * const u8 *in, Ciphertext input
  * unsigned long ciphertext_len, Length of data in bytes for decryption.
@@ -137,11 +155,28 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
  * unsigned long auth_tag_len) Authenticated Tag Length in bytes.
  * Valid values are 16 (most likely), 12 or 8.
  */
-asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_dec(void *ctx,
+			struct gcm_context_data *gdata, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
+/* Scatter / Gather routines, with args similar to above */
+asmlinkage void aesni_gcm_init(void *ctx,
+			       struct gcm_context_data *gdata,
+			       u8 *iv,
+			       u8 *hash_subkey, const u8 *aad,
+			       unsigned long aad_len);
+asmlinkage void aesni_gcm_enc_update(void *ctx,
+				     struct gcm_context_data *gdata, u8 *out,
+				     const u8 *in, unsigned long plaintext_len);
+asmlinkage void aesni_gcm_dec_update(void *ctx,
+				     struct gcm_context_data *gdata, u8 *out,
+				     const u8 *in,
+				     unsigned long ciphertext_len);
+asmlinkage void aesni_gcm_finalize(void *ctx,
+				   struct gcm_context_data *gdata,
+				   u8 *auth_tag, unsigned long auth_tag_len);
 
 #ifdef CONFIG_AS_AVX
 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
@@ -167,15 +202,17 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
 			const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
-static void aesni_gcm_enc_avx(void *ctx, u8 *out,
+static void aesni_gcm_enc_avx(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
         struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
-		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_enc(ctx, data, out, in,
+			plaintext_len, iv, hash_subkey, aad,
+			aad_len, auth_tag, auth_tag_len);
 	} else {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
@@ -183,15 +220,17 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out,
 	}
 }
 
-static void aesni_gcm_dec_avx(void *ctx, u8 *out,
+static void aesni_gcm_dec_avx(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
         struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_dec(ctx, data, out, in,
+			ciphertext_len, iv, hash_subkey, aad,
+			aad_len, auth_tag, auth_tag_len);
 	} else {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
@@ -218,15 +257,17 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
 			const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
-static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
+static void aesni_gcm_enc_avx2(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
        struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_enc(ctx, data, out, in,
+			      plaintext_len, iv, hash_subkey, aad,
+			      aad_len, auth_tag, auth_tag_len);
 	} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
@@ -238,15 +279,17 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
 	}
 }
 
-static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
+static void aesni_gcm_dec_avx2(void *ctx,
+	struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
        struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
-				aad, aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_dec(ctx, data, out, in,
+			      ciphertext_len, iv, hash_subkey,
+			      aad, aad_len, auth_tag, auth_tag_len);
 	} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
@@ -259,15 +302,19 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
 }
 #endif
 
-static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
-			const u8 *in, unsigned long plaintext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
+static void (*aesni_gcm_enc_tfm)(void *ctx,
+				 struct gcm_context_data *data, u8 *out,
+				 const u8 *in, unsigned long plaintext_len,
+				 u8 *iv, u8 *hash_subkey, const u8 *aad,
+				 unsigned long aad_len, u8 *auth_tag,
+				 unsigned long auth_tag_len);
 
-static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
-			const u8 *in, unsigned long ciphertext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
+static void (*aesni_gcm_dec_tfm)(void *ctx,
+				 struct gcm_context_data *data, u8 *out,
+				 const u8 *in, unsigned long ciphertext_len,
+				 u8 *iv, u8 *hash_subkey, const u8 *aad,
+				 unsigned long aad_len, u8 *auth_tag,
+				 unsigned long auth_tag_len);
 
 static inline struct
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
@@ -744,6 +791,127 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
 	return 0;
 }
 
+static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
+			      unsigned int assoclen, u8 *hash_subkey,
+			      u8 *iv, void *aes_ctx)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
+	struct scatter_walk dst_sg_walk = {};
+	unsigned long left = req->cryptlen;
+	unsigned long len, srclen, dstlen;
+	struct scatter_walk assoc_sg_walk;
+	struct scatter_walk src_sg_walk;
+	struct scatterlist src_start[2];
+	struct scatterlist dst_start[2];
+	struct scatterlist *src_sg;
+	struct scatterlist *dst_sg;
+	u8 *src, *dst, *assoc;
+	u8 *assocmem = NULL;
+	u8 authTag[16];
+
+	if (!enc)
+		left -= auth_tag_len;
+
+	/* Linearize assoc, if not already linear */
+	if (req->src->length >= assoclen && req->src->length &&
+		(!PageHighMem(sg_page(req->src)) ||
+			req->src->offset + req->src->length < PAGE_SIZE)) {
+		scatterwalk_start(&assoc_sg_walk, req->src);
+		assoc = scatterwalk_map(&assoc_sg_walk);
+	} else {
+		/* assoc can be any length, so must be on heap */
+		assocmem = kmalloc(assoclen, GFP_ATOMIC);
+		if (unlikely(!assocmem))
+			return -ENOMEM;
+		assoc = assocmem;
+
+		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
+	}
+
+	src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
+	scatterwalk_start(&src_sg_walk, src_sg);
+	if (req->src != req->dst) {
+		dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen);
+		scatterwalk_start(&dst_sg_walk, dst_sg);
+	}
+
+	kernel_fpu_begin();
+	aesni_gcm_init(aes_ctx, &data, iv,
+		hash_subkey, assoc, assoclen);
+	if (req->src != req->dst) {
+		while (left) {
+			src = scatterwalk_map(&src_sg_walk);
+			dst = scatterwalk_map(&dst_sg_walk);
+			srclen = scatterwalk_clamp(&src_sg_walk, left);
+			dstlen = scatterwalk_clamp(&dst_sg_walk, left);
+			len = min(srclen, dstlen);
+			if (len) {
+				if (enc)
+					aesni_gcm_enc_update(aes_ctx, &data,
+							     dst, src, len);
+				else
+					aesni_gcm_dec_update(aes_ctx, &data,
+							     dst, src, len);
+			}
+			left -= len;
+
+			scatterwalk_unmap(src);
+			scatterwalk_unmap(dst);
+			scatterwalk_advance(&src_sg_walk, len);
+			scatterwalk_advance(&dst_sg_walk, len);
+			scatterwalk_done(&src_sg_walk, 0, left);
+			scatterwalk_done(&dst_sg_walk, 1, left);
+		}
+	} else {
+		while (left) {
+			dst = src = scatterwalk_map(&src_sg_walk);
+			len = scatterwalk_clamp(&src_sg_walk, left);
+			if (len) {
+				if (enc)
+					aesni_gcm_enc_update(aes_ctx, &data,
+							     src, src, len);
+				else
+					aesni_gcm_dec_update(aes_ctx, &data,
+							     src, src, len);
+			}
+			left -= len;
+			scatterwalk_unmap(src);
+			scatterwalk_advance(&src_sg_walk, len);
+			scatterwalk_done(&src_sg_walk, 1, left);
+		}
+	}
+	aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len);
+	kernel_fpu_end();
+
+	if (!assocmem)
+		scatterwalk_unmap(assoc);
+	else
+		kfree(assocmem);
+
+	if (!enc) {
+		u8 authTagMsg[16];
+
+		/* Copy out original authTag */
+		scatterwalk_map_and_copy(authTagMsg, req->src,
+					 req->assoclen + req->cryptlen -
+					 auth_tag_len,
+					 auth_tag_len, 0);
+
+		/* Compare generated tag with passed in tag. */
+		return crypto_memneq(authTagMsg, authTag, auth_tag_len) ?
+			-EBADMSG : 0;
+	}
+
+	/* Copy in the authTag */
+	scatterwalk_map_and_copy(authTag, req->dst,
+				 req->assoclen + req->cryptlen,
+				 auth_tag_len, 1);
+
+	return 0;
+}
+
 static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
@@ -753,7 +921,14 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
 
+	if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
+		aesni_gcm_enc_tfm == aesni_gcm_enc ||
+		req->cryptlen < AVX_GEN2_OPTSIZE) {
+		return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
+					  aes_ctx);
+	}
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
 	    req->src->offset + req->src->length <= PAGE_SIZE) &&
@@ -782,7 +957,7 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 	}
 
 	kernel_fpu_begin();
-	aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
+	aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv,
 			  hash_subkey, assoc, assoclen,
 			  dst + req->cryptlen, auth_tag_len);
 	kernel_fpu_end();
@@ -817,8 +992,15 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 	u8 authTag[16];
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
 	int retval = 0;
 
+	if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
+		aesni_gcm_enc_tfm == aesni_gcm_enc ||
+		req->cryptlen < AVX_GEN2_OPTSIZE) {
+		return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
+					  aes_ctx);
+	}
 	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
 
 	if (sg_is_last(req->src) &&
@@ -849,7 +1031,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 
 
 	kernel_fpu_begin();
-	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
+	aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv,
 			  hash_subkey, assoc, assoclen,
 			  authTag, auth_tag_len);
 	kernel_fpu_end();
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index f9eca34301e2..3e0c07cc9124 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -25,13 +25,13 @@
  *
  */
 
-#include <asm/processor.h>
+#include <crypto/algapi.h>
 #include <crypto/blowfish.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
 
 /* regular block cipher functions */
 asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
@@ -77,20 +77,28 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm,
+				    const u8 *key, unsigned int keylen)
+{
+	return blowfish_setkey(&tfm->base, key, keylen);
+}
+
+static int ecb_crypt(struct skcipher_request *req,
 		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
 		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes;
 	int err;
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
 		/* Process four block batch */
 		if (nbytes >= bsize * 4) {
@@ -116,34 +124,25 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
+	return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
+	return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way);
 }
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_encrypt(struct bf_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -164,27 +163,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_encrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct bf_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -245,24 +244,25 @@ done:
 	return nbytes;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
+static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk)
 {
 	u8 *ctrblk = walk->iv;
 	u8 keystream[BF_BLOCK_SIZE];
@@ -276,10 +276,8 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk)
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -332,29 +330,30 @@ done:
 	return nbytes;
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __ctr_crypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
-	if (walk.nbytes) {
-		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+	if (nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = skcipher_walk_done(&walk, 0);
 	}
 
 	return err;
 }
 
-static struct crypto_alg bf_algs[4] = { {
+static struct crypto_alg bf_cipher_alg = {
 	.cra_name		= "blowfish",
 	.cra_driver_name	= "blowfish-asm",
 	.cra_priority		= 200,
@@ -372,66 +371,50 @@ static struct crypto_alg bf_algs[4] = { {
 			.cia_decrypt		= blowfish_decrypt,
 		}
 	}
-}, {
-	.cra_name		= "ecb(blowfish)",
-	.cra_driver_name	= "ecb-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
+};
+
+static struct skcipher_alg bf_skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(blowfish)",
+		.base.cra_driver_name	= "ecb-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= BF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(blowfish)",
+		.base.cra_driver_name	= "cbc-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= BF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.ivsize			= BF_BLOCK_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(blowfish)",
+		.base.cra_driver_name	= "ctr-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.ivsize			= BF_BLOCK_SIZE,
+		.chunksize		= BF_BLOCK_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
 	},
-}, {
-	.cra_name		= "cbc(blowfish)",
-	.cra_driver_name	= "cbc-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(blowfish)",
-	.cra_driver_name	= "ctr-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
+};
 
 static bool is_blacklisted_cpu(void)
 {
@@ -456,6 +439,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 static int __init init(void)
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 			"blowfish-x86_64: performance on this CPU "
@@ -464,12 +449,23 @@ static int __init init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
+	err = crypto_register_alg(&bf_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(bf_skcipher_algs,
+					ARRAY_SIZE(bf_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&bf_cipher_alg);
+
+	return err;
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
+	crypto_unregister_alg(&bf_cipher_alg);
+	crypto_unregister_skciphers(bf_skcipher_algs,
+				    ARRAY_SIZE(bf_skcipher_algs));
 }
 
 module_init(init);
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 60907c139c4e..d4992e458f92 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -10,18 +10,15 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
@@ -150,413 +147,120 @@ static const struct common_glue_ctx camellia_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
 {
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
+	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
+				 &tfm->base.crt_flags);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
-}
-
-static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
-	return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
-			      CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
-			      nbytes);
-}
-
-static inline void camellia_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
-}
-
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			   unsigned int key_len)
-{
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
-				 &tfm->crt_flags);
-}
-
-struct crypt_priv {
-	struct camellia_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-	}
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-	}
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&camellia_enc_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&camellia_dec_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static struct crypto_alg cmll_algs[10] = { {
-	.cra_name		= "__ecb-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-ecb-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-cbc-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-ctr-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-lrw-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-xts-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg camellia_algs[] = {
+	{
+		.base.cra_name		= "__ecb(camellia)",
+		.base.cra_driver_name	= "__ecb-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(camellia)",
+		.base.cra_driver_name	= "__cbc-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(camellia)",
+		.base.cra_driver_name	= "__ctr-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(camellia)",
+		.base.cra_driver_name	= "__xts-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= xts_camellia_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
 
 static int __init camellia_aesni_init(void)
 {
@@ -576,12 +280,15 @@ static int __init camellia_aesni_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	return simd_register_skciphers_compat(camellia_algs,
+					      ARRAY_SIZE(camellia_algs),
+					      camellia_simd_algs);
 }
 
 static void __exit camellia_aesni_fini(void)
 {
-	crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
+				  camellia_simd_algs);
 }
 
 module_init(camellia_aesni_init);
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index d96429da88eb..d09f6521466a 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -10,18 +10,15 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 
@@ -154,401 +151,142 @@ static const struct common_glue_ctx camellia_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
 {
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
+				 &tfm->base.crt_flags);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
 
-static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
-			      CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
-			      nbytes);
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 
-static inline void camellia_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			   unsigned int key_len)
+int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			unsigned int keylen)
 {
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
-				 &tfm->crt_flags);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+				flags);
 }
+EXPORT_SYMBOL_GPL(xts_camellia_setkey);
 
-struct crypt_priv {
-	struct camellia_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&camellia_enc_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&camellia_dec_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg cmll_algs[10] = { {
-	.cra_name		= "__ecb-camellia-aesni",
-	.cra_driver_name	= "__driver-ecb-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-camellia-aesni",
-	.cra_driver_name	= "__driver-cbc-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-camellia-aesni",
-	.cra_driver_name	= "__driver-ctr-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-camellia-aesni",
-	.cra_driver_name	= "__driver-lrw-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-camellia-aesni",
-	.cra_driver_name	= "__driver-xts-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg camellia_algs[] = {
+	{
+		.base.cra_name		= "__ecb(camellia)",
+		.base.cra_driver_name	= "__ecb-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(camellia)",
+		.base.cra_driver_name	= "__cbc-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(camellia)",
+		.base.cra_driver_name	= "__ctr-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(camellia)",
+		.base.cra_driver_name	= "__xts-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= xts_camellia_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
 
 static int __init camellia_aesni_init(void)
 {
@@ -567,12 +305,15 @@ static int __init camellia_aesni_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	return simd_register_skciphers_compat(camellia_algs,
+					      ARRAY_SIZE(camellia_algs),
+					      camellia_simd_algs);
 }
 
 static void __exit camellia_aesni_fini(void)
 {
-	crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
+				  camellia_simd_algs);
 }
 
 module_init(camellia_aesni_init);
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index af4840ab2a3d..dcd5e0f71b00 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -23,15 +23,12 @@
  *
  */
 
-#include <asm/processor.h>
 #include <asm/unaligned.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <crypto/algapi.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -1272,13 +1269,19 @@ int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
 }
 EXPORT_SYMBOL_GPL(__camellia_setkey);
 
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int key_len)
 {
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
+	return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len,
 				 &tfm->crt_flags);
 }
 
+static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				    unsigned int key_len)
+{
+	return camellia_setkey(&tfm->base, key, key_len);
+}
+
 void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
 {
 	u128 iv = *src;
@@ -1373,188 +1376,33 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct camellia_ctx *ctx = priv;
-	int i;
-
-	while (nbytes >= 2 * bsize) {
-		camellia_enc_blk_2way(ctx, srcdst, srcdst);
-		srcdst += bsize * 2;
-		nbytes -= bsize * 2;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct camellia_ctx *ctx = priv;
-	int i;
-
-	while (nbytes >= 2 * bsize) {
-		camellia_dec_blk_2way(ctx, srcdst, srcdst);
-		srcdst += bsize * 2;
-		nbytes -= bsize * 2;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx, srcdst, srcdst);
-}
-
-int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			unsigned int keylen)
-{
-	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __camellia_setkey(&ctx->camellia_ctx, key,
-				keylen - CAMELLIA_BLOCK_SIZE,
-				&tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table,
-			      key + keylen - CAMELLIA_BLOCK_SIZE);
-}
-EXPORT_SYMBOL_GPL(lrw_camellia_setkey);
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->camellia_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->camellia_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 
-void lrw_camellia_exit_tfm(struct crypto_tfm *tfm)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
-EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm);
 
-int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			unsigned int keylen)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
-EXPORT_SYMBOL_GPL(xts_camellia_setkey);
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[2 * 4];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[2 * 4];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 
-static struct crypto_alg camellia_algs[6] = { {
+static struct crypto_alg camellia_cipher_alg = {
 	.cra_name		= "camellia",
 	.cra_driver_name	= "camellia-asm",
 	.cra_priority		= 200,
@@ -1572,109 +1420,50 @@ static struct crypto_alg camellia_algs[6] = { {
 			.cia_decrypt	 = camellia_decrypt
 		}
 	}
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-						CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-						CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg camellia_skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(camellia)",
+		.base.cra_driver_name	= "ecb-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(camellia)",
+		.base.cra_driver_name	= "cbc-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(camellia)",
+		.base.cra_driver_name	= "ctr-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
 static bool is_blacklisted_cpu(void)
 {
@@ -1700,6 +1489,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 static int __init init(void)
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 			"camellia-x86_64: performance on this CPU "
@@ -1708,12 +1499,23 @@ static int __init init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+	err = crypto_register_alg(&camellia_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(camellia_skcipher_algs,
+					ARRAY_SIZE(camellia_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&camellia_cipher_alg);
+
+	return err;
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+	crypto_unregister_alg(&camellia_cipher_alg);
+	crypto_unregister_skciphers(camellia_skcipher_algs,
+				    ARRAY_SIZE(camellia_skcipher_algs));
 }
 
 module_init(init);
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index dbea6020ffe7..41034745d6a2 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -21,18 +21,14 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/hardirq.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/cast5.h>
-#include <crypto/cryptd.h>
-#include <crypto/ctr.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/glue_helper.h>
+#include <crypto/internal/simd.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 #define CAST5_PARALLEL_BLOCKS 16
 
@@ -45,10 +41,17 @@ asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
 				__be64 *iv);
 
-static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	return cast5_setkey(&tfm->base, key, keylen);
+}
+
+static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
+				   unsigned int nbytes)
 {
 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+			      walk, fpu_enabled, nbytes);
 }
 
 static inline void cast5_fpu_end(bool fpu_enabled)
@@ -56,29 +59,28 @@ static inline void cast5_fpu_end(bool fpu_enabled)
 	return glue_fpu_end(fpu_enabled);
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     bool enc)
+static int ecb_crypt(struct skcipher_request *req, bool enc)
 {
 	bool fpu_enabled = false;
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes;
 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
 	int err;
 
-	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
-
-	err = blkcipher_walk_virt(desc, walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
 
 		/* Process multi-block batch */
 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+			fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
 			do {
 				fn(ctx, wdst, wsrc);
 
@@ -103,76 +105,58 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	cast5_fpu_end(fpu_enabled);
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, true);
+	return ecb_crypt(req, true);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, false);
+	return ecb_crypt(req, false);
 }
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u64 *src = (u64 *)walk->src.virt.addr;
-	u64 *dst = (u64 *)walk->dst.virt.addr;
-	u64 *iv = (u64 *)walk->iv;
-
-	do {
-		*dst = *src ^ *iv;
-		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	*(u64 *)walk->iv = *iv;
-	return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		u64 *src = (u64 *)walk.src.virt.addr;
+		u64 *dst = (u64 *)walk.dst.virt.addr;
+		u64 *iv = (u64 *)walk.iv;
+
+		do {
+			*dst = *src ^ *iv;
+			__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
+			iv = dst;
+			src++;
+			dst++;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+		*(u64 *)walk.iv = *iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -224,31 +208,29 @@ done:
 	return nbytes;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	cast5_fpu_end(fpu_enabled);
 	return err;
 }
 
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
+static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	u8 *ctrblk = walk->iv;
 	u8 keystream[CAST5_BLOCK_SIZE];
 	u8 *src = walk->src.virt.addr;
@@ -261,10 +243,9 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct skcipher_walk *walk,
+				struct cast5_ctx *ctx)
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -307,162 +288,80 @@ done:
 	return nbytes;
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+		nbytes = __ctr_crypt(&walk, ctx);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	cast5_fpu_end(fpu_enabled);
 
 	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		ctr_crypt_final(&walk, ctx);
+		err = skcipher_walk_done(&walk, 0);
 	}
 
 	return err;
 }
 
+static struct skcipher_alg cast5_algs[] = {
+	{
+		.base.cra_name		= "__ecb(cast5)",
+		.base.cra_driver_name	= "__ecb-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(cast5)",
+		.base.cra_driver_name	= "__cbc-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.ivsize			= CAST5_BLOCK_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(cast5)",
+		.base.cra_driver_name	= "__ctr-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.ivsize			= CAST5_BLOCK_SIZE,
+		.chunksize		= CAST5_BLOCK_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
-static struct crypto_alg cast5_algs[6] = { {
-	.cra_name		= "__ecb-cast5-avx",
-	.cra_driver_name	= "__driver-ecb-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-cast5-avx",
-	.cra_driver_name	= "__driver-cbc-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-cast5-avx",
-	.cra_driver_name	= "__driver-ctr-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(cast5)",
-	.cra_driver_name	= "ecb-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(cast5)",
-	.cra_driver_name	= "cbc-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(cast5)",
-	.cra_driver_name	= "ctr-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-} };
+static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
 
 static int __init cast5_init(void)
 {
@@ -474,12 +373,15 @@ static int __init cast5_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+	return simd_register_skciphers_compat(cast5_algs,
+					      ARRAY_SIZE(cast5_algs),
+					      cast5_simd_algs);
 }
 
 static void __exit cast5_exit(void)
 {
-	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+	simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
+				  cast5_simd_algs);
 }
 
 module_init(cast5_init);
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 50e684768c55..9fb66b5e94b2 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -24,19 +24,13 @@
  */
 
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/cast6.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
+#include <crypto/internal/simd.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/glue_helper.h>
 
 #define CAST6_PARALLEL_BLOCKS 8
@@ -56,6 +50,12 @@ asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
 asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
 				   const u8 *src, le128 *iv);
 
+static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
+				 const u8 *key, unsigned int keylen)
+{
+	return cast6_setkey(&tfm->base, key, keylen);
+}
+
 static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
 	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
@@ -157,164 +157,30 @@ static const struct common_glue_ctx cast6_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&cast6_enc, req);
 }
 
-static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+	return glue_ecb_req_128bit(&cast6_dec, req);
 }
 
-static inline void cast6_fpu_end(bool fpu_enabled)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
+					   req);
 }
 
-struct crypt_priv {
-	struct cast6_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAST6_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
-		cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__cast6_encrypt(ctx->ctx, srcdst, srcdst);
+	return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAST6_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
-		cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__cast6_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct cast6_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct cast6_ctx cast6_ctx;
-};
-
-static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
-			     &tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAST6_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->cast6_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	cast6_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAST6_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->cast6_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	cast6_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
+	return glue_ctr_req_128bit(&cast6_ctr, req);
 }
 
 struct cast6_xts_ctx {
@@ -322,14 +188,14 @@ struct cast6_xts_ctx {
 	struct cast6_ctx crypt_ctx;
 };
 
-static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
+static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			    unsigned int keylen)
 {
-	struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
 	int err;
 
-	err = xts_check_key(tfm, key, keylen);
+	err = xts_verify_key(tfm, key, keylen);
 	if (err)
 		return err;
 
@@ -343,245 +209,87 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
 			      flags);
 }
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__cast6_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&cast6_enc_xts, req,
+				   XTS_TWEAK_CAST(__cast6_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__cast6_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&cast6_dec_xts, req,
+				   XTS_TWEAK_CAST(__cast6_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static struct crypto_alg cast6_algs[10] = { {
-	.cra_name		= "__ecb-cast6-avx",
-	.cra_driver_name	= "__driver-ecb-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-cast6-avx",
-	.cra_driver_name	= "__driver-cbc-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-cast6-avx",
-	.cra_driver_name	= "__driver-ctr-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-cast6-avx",
-	.cra_driver_name	= "__driver-lrw-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= lrw_cast6_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-cast6-avx",
-	.cra_driver_name	= "__driver-xts-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= xts_cast6_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(cast6)",
-	.cra_driver_name	= "ecb-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(cast6)",
-	.cra_driver_name	= "cbc-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(cast6)",
-	.cra_driver_name	= "ctr-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(cast6)",
-	.cra_driver_name	= "lrw-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(cast6)",
-	.cra_driver_name	= "xts-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg cast6_algs[] = {
+	{
+		.base.cra_name		= "__ecb(cast6)",
+		.base.cra_driver_name	= "__ecb-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(cast6)",
+		.base.cra_driver_name	= "__cbc-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(cast6)",
+		.base.cra_driver_name	= "__ctr-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.chunksize		= CAST6_BLOCK_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(cast6)",
+		.base.cra_driver_name	= "__xts-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAST6_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.setkey			= xts_cast6_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)];
 
 static int __init cast6_init(void)
 {
@@ -593,12 +301,15 @@ static int __init cast6_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+	return simd_register_skciphers_compat(cast6_algs,
+					      ARRAY_SIZE(cast6_algs),
+					      cast6_simd_algs);
 }
 
 static void __exit cast6_exit(void)
 {
-	crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+	simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs),
+				  cast6_simd_algs);
 }
 
 module_init(cast6_init);
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 30c0a37f4882..5c610d4ef9fc 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -20,13 +20,13 @@
  *
  */
 
-#include <asm/processor.h>
+#include <crypto/algapi.h>
 #include <crypto/des.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
 
 struct des3_ede_x86_ctx {
 	u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
@@ -83,18 +83,18 @@ static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     const u32 *expkey)
+static int ecb_crypt(struct skcipher_request *req, const u32 *expkey)
 {
-	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
+	const unsigned int bsize = DES3_EDE_BLOCK_SIZE;
+	struct skcipher_walk walk;
 	unsigned int nbytes;
 	int err;
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
 		/* Process four block batch */
 		if (nbytes >= bsize * 3) {
@@ -121,36 +121,31 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, ctx->enc_expkey);
+	return ecb_crypt(req, ctx->enc_expkey);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, ctx->dec_expkey);
+	return ecb_crypt(req, ctx->dec_expkey);
 }
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -171,27 +166,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_encrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -250,25 +245,26 @@ done:
 	return nbytes;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
 static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
-			    struct blkcipher_walk *walk)
+			    struct skcipher_walk *walk)
 {
 	u8 *ctrblk = walk->iv;
 	u8 keystream[DES3_EDE_BLOCK_SIZE];
@@ -282,10 +278,9 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx,
+				struct skcipher_walk *walk)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	__be64 *src = (__be64 *)walk->src.virt.addr;
@@ -333,23 +328,24 @@ done:
 	return nbytes;
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __ctr_crypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
-	if (walk.nbytes) {
-		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+	if (nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = skcipher_walk_done(&walk, 0);
 	}
 
 	return err;
@@ -381,7 +377,14 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
-static struct crypto_alg des3_ede_algs[4] = { {
+static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm,
+					const u8 *key,
+					unsigned int keylen)
+{
+	return des3_ede_x86_setkey(&tfm->base, key, keylen);
+}
+
+static struct crypto_alg des3_ede_cipher = {
 	.cra_name		= "des3_ede",
 	.cra_driver_name	= "des3_ede-asm",
 	.cra_priority		= 200,
@@ -399,66 +402,50 @@ static struct crypto_alg des3_ede_algs[4] = { {
 			.cia_decrypt		= des3_ede_x86_decrypt,
 		}
 	}
-}, {
-	.cra_name		= "ecb(des3_ede)",
-	.cra_driver_name	= "ecb-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(des3_ede)",
-	.cra_driver_name	= "cbc-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(des3_ede)",
-	.cra_driver_name	= "ctr-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg des3_ede_skciphers[] = {
+	{
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_driver_name	= "ecb-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_driver_name	= "cbc-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(des3_ede)",
+		.base.cra_driver_name	= "ctr-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.chunksize		= DES3_EDE_BLOCK_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
 static bool is_blacklisted_cpu(void)
 {
@@ -483,17 +470,30 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 static int __init des3_ede_x86_init(void)
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 		pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
+	err = crypto_register_alg(&des3_ede_cipher);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(des3_ede_skciphers,
+					ARRAY_SIZE(des3_ede_skciphers));
+	if (err)
+		crypto_unregister_alg(&des3_ede_cipher);
+
+	return err;
 }
 
 static void __exit des3_ede_x86_fini(void)
 {
-	crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
+	crypto_unregister_alg(&des3_ede_cipher);
+	crypto_unregister_skciphers(des3_ede_skciphers,
+				    ARRAY_SIZE(des3_ede_skciphers));
 }
 
 module_init(des3_ede_x86_init);
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index d61e57960fe0..a78ef99a9981 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -29,313 +29,212 @@
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 #include <crypto/internal/skcipher.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
 
-static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-				   struct blkcipher_desc *desc,
-				   struct blkcipher_walk *walk)
+int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+			struct skcipher_request *req)
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes, i, func_bytes;
+	struct skcipher_walk walk;
 	bool fpu_enabled = false;
+	unsigned int nbytes;
 	int err;
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		const u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes;
+		unsigned int i;
 
 		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-
+					     &walk, fpu_enabled, nbytes);
 		for (i = 0; i < gctx->num_funcs; i++) {
 			func_bytes = bsize * gctx->funcs[i].num_blocks;
 
-			/* Process multi-block batch */
-			if (nbytes >= func_bytes) {
-				do {
-					gctx->funcs[i].fn_u.ecb(ctx, wdst,
-								wsrc);
+			if (nbytes < func_bytes)
+				continue;
 
-					wsrc += func_bytes;
-					wdst += func_bytes;
-					nbytes -= func_bytes;
-				} while (nbytes >= func_bytes);
+			/* Process multi-block batch */
+			do {
+				gctx->funcs[i].fn_u.ecb(ctx, dst, src);
+				src += func_bytes;
+				dst += func_bytes;
+				nbytes -= func_bytes;
+			} while (nbytes >= func_bytes);
 
-				if (nbytes < bsize)
-					goto done;
-			}
+			if (nbytes < bsize)
+				break;
 		}
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	glue_fpu_end(fpu_enabled);
 	return err;
 }
+EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
 
-int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes)
+int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
+				struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return __glue_ecb_crypt_128bit(gctx, desc, &walk);
-}
-EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
-
-static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-					      struct blkcipher_desc *desc,
-					      struct blkcipher_walk *walk)
-{
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		fn(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	*(u128 *)walk->iv = *iv;
-	return nbytes;
-}
-
-int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-			    struct blkcipher_desc *desc,
-			    struct scatterlist *dst,
-			    struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		const u128 *src = (u128 *)walk.src.virt.addr;
+		u128 *dst = (u128 *)walk.dst.virt.addr;
+		u128 *iv = (u128 *)walk.iv;
+
+		do {
+			u128_xor(dst, src, iv);
+			fn(ctx, (u8 *)dst, (u8 *)dst);
+			iv = dst;
+			src++;
+			dst++;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+		*(u128 *)walk.iv = *iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
-
 	return err;
 }
-EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
+EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit);
 
-static unsigned int
-__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc,
-			  struct blkcipher_walk *walk)
+int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+				struct skcipher_request *req)
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 last_iv;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
+	struct skcipher_walk walk;
+	bool fpu_enabled = false;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
 
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
+	while ((nbytes = walk.nbytes)) {
+		const u128 *src = walk.src.virt.addr;
+		u128 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes, num_blocks;
+		unsigned int i;
+		u128 last_iv;
 
-	last_iv = *src;
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     &walk, fpu_enabled, nbytes);
+		/* Start of the last block. */
+		src += nbytes / bsize - 1;
+		dst += nbytes / bsize - 1;
 
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
+		last_iv = *src;
 
-		/* Process multi-block batch */
-		if (nbytes >= func_bytes) {
+		for (i = 0; i < gctx->num_funcs; i++) {
+			num_blocks = gctx->funcs[i].num_blocks;
+			func_bytes = bsize * num_blocks;
+
+			if (nbytes < func_bytes)
+				continue;
+
+			/* Process multi-block batch */
 			do {
-				nbytes -= func_bytes - bsize;
 				src -= num_blocks - 1;
 				dst -= num_blocks - 1;
 
 				gctx->funcs[i].fn_u.cbc(ctx, dst, src);
 
-				nbytes -= bsize;
+				nbytes -= func_bytes;
 				if (nbytes < bsize)
 					goto done;
 
-				u128_xor(dst, dst, src - 1);
-				src -= 1;
-				dst -= 1;
+				u128_xor(dst, dst, --src);
+				dst--;
 			} while (nbytes >= func_bytes);
 		}
-	}
-
 done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
-
-	return nbytes;
-}
-
-int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-			    struct blkcipher_desc *desc,
-			    struct scatterlist *dst,
-			    struct scatterlist *src, unsigned int nbytes)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-		nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		u128_xor(dst, dst, (u128 *)walk.iv);
+		*(u128 *)walk.iv = last_iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	glue_fpu_end(fpu_enabled);
 	return err;
 }
-EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
+EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
 
-static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
-					struct blkcipher_desc *desc,
-					struct blkcipher_walk *walk)
+int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+			struct skcipher_request *req)
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 *src = (u8 *)walk->src.virt.addr;
-	u8 *dst = (u8 *)walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-	le128 ctrblk;
-	u128 tmp;
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	const unsigned int bsize = 128 / 8;
+	struct skcipher_walk walk;
+	bool fpu_enabled = false;
+	unsigned int nbytes;
+	int err;
 
-	be128_to_le128(&ctrblk, (be128 *)walk->iv);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	memcpy(&tmp, src, nbytes);
-	fn_ctr(ctx, &tmp, &tmp, &ctrblk);
-	memcpy(dst, &tmp, nbytes);
+	while ((nbytes = walk.nbytes) >= bsize) {
+		const u128 *src = walk.src.virt.addr;
+		u128 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes, num_blocks;
+		unsigned int i;
+		le128 ctrblk;
 
-	le128_to_be128((be128 *)walk->iv, &ctrblk);
-}
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     &walk, fpu_enabled, nbytes);
 
-static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-					    struct blkcipher_desc *desc,
-					    struct blkcipher_walk *walk)
-{
-	const unsigned int bsize = 128 / 8;
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	le128 ctrblk;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
+		be128_to_le128(&ctrblk, (be128 *)walk.iv);
 
-	be128_to_le128(&ctrblk, (be128 *)walk->iv);
+		for (i = 0; i < gctx->num_funcs; i++) {
+			num_blocks = gctx->funcs[i].num_blocks;
+			func_bytes = bsize * num_blocks;
 
-	/* Process multi-block batch */
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
+			if (nbytes < func_bytes)
+				continue;
 
-		if (nbytes >= func_bytes) {
+			/* Process multi-block batch */
 			do {
 				gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
-
 				src += num_blocks;
 				dst += num_blocks;
 				nbytes -= func_bytes;
 			} while (nbytes >= func_bytes);
 
 			if (nbytes < bsize)
-				goto done;
+				break;
 		}
-	}
-
-done:
-	le128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
-}
-
-int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, bsize);
 
-	while ((nbytes = walk.nbytes) >= bsize) {
-		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-		nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		le128_to_be128((be128 *)walk.iv, &ctrblk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	glue_fpu_end(fpu_enabled);
 
-	if (walk.nbytes) {
-		glue_ctr_crypt_final_128bit(
-			gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
-
-static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-					    void *ctx,
-					    struct blkcipher_desc *desc,
-					    struct blkcipher_walk *walk)
-{
-	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
-
-	/* Process multi-block batch */
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
-
-		if (nbytes >= func_bytes) {
-			do {
-				gctx->funcs[i].fn_u.xts(ctx, dst, src,
-							(le128 *)walk->iv);
+	if (nbytes) {
+		le128 ctrblk;
+		u128 tmp;
 
-				src += num_blocks;
-				dst += num_blocks;
-				nbytes -= func_bytes;
-			} while (nbytes >= func_bytes);
+		be128_to_le128(&ctrblk, (be128 *)walk.iv);
+		memcpy(&tmp, walk.src.virt.addr, nbytes);
+		gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
+							  &ctrblk);
+		memcpy(walk.dst.virt.addr, &tmp, nbytes);
+		le128_to_be128((be128 *)walk.iv, &ctrblk);
 
-			if (nbytes < bsize)
-				goto done;
-		}
+		err = skcipher_walk_done(&walk, 0);
 	}
 
-done:
-	return nbytes;
+	return err;
 }
+EXPORT_SYMBOL_GPL(glue_ctr_req_128bit);
 
 static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 					  void *ctx,
@@ -372,46 +271,6 @@ done:
 	return nbytes;
 }
 
-/* for implementations implementing faster XTS IV generator */
-int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes,
-			  void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src),
-			  void *tweak_ctx, void *crypt_ctx)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	/* set minimum length to bsize, for tweak_fn */
-	fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-				     desc, fpu_enabled,
-				     nbytes < bsize ? bsize : nbytes);
-
-	/* calculate first value of T */
-	tweak_fn(tweak_ctx, walk.iv, walk.iv);
-
-	while (nbytes) {
-		nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-	}
-
-	glue_fpu_end(fpu_enabled);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
-
 int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			struct skcipher_request *req,
 			common_glue_func_t tweak_fn, void *tweak_ctx,
@@ -429,9 +288,9 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 		return err;
 
 	/* set minimum length to bsize, for tweak_fn */
-	fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					    &walk, fpu_enabled,
-					    nbytes < bsize ? bsize : nbytes);
+	fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+				     &walk, fpu_enabled,
+				     nbytes < bsize ? bsize : nbytes);
 
 	/* calculate first value of T */
 	tweak_fn(tweak_ctx, walk.iv, walk.iv);
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 870f6d812a2d..03347b16ac9d 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -14,15 +14,12 @@
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
+#include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/serpent-avx.h>
+#include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
+#include <asm/crypto/serpent-avx.h>
 
 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
 
@@ -40,6 +37,12 @@ asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
 asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
 				      const u8 *src, le128 *iv);
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
 static const struct common_glue_ctx serpent_enc = {
 	.num_funcs = 3,
 	.fpu_blocks_limit = 8,
@@ -136,403 +139,113 @@ static const struct common_glue_ctx serpent_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				       dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	/* since reusing AVX functions, starts using FPU at 8 parallel blocks */
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
-}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
+	return glue_xts_req_128bit(&serpent_enc_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&serpent_dec_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg srp_algs[10] = { {
-	.cra_name		= "__ecb-serpent-avx2",
-	.cra_driver_name	= "__driver-ecb-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[0].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-avx2",
-	.cra_driver_name	= "__driver-cbc-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[1].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-avx2",
-	.cra_driver_name	= "__driver-ctr-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[2].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-avx2",
-	.cra_driver_name	= "__driver-lrw-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[3].cra_list),
-	.cra_exit		= lrw_serpent_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-avx2",
-	.cra_driver_name	= "__driver-xts-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[4].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[5].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[6].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[7].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[8].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[9].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(serpent)",
+		.base.cra_driver_name	= "__xts-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= 2 * SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= xts_serpent_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init init(void)
 {
@@ -548,12 +261,15 @@ static int __init init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 
 module_init(init);
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 6f778d3daa22..458567ecf76c 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -24,21 +24,15 @@
  */
 
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/serpent-avx.h>
 #include <asm/crypto/glue_helper.h>
+#include <asm/crypto/serpent-avx.h>
 
 /* 8-way parallel cipher functions */
 asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
@@ -91,6 +85,31 @@ void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 }
 EXPORT_SYMBOL_GPL(serpent_xts_dec);
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
+int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
+		       unsigned int keylen)
+{
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
+}
+EXPORT_SYMBOL_GPL(xts_serpent_setkey);
 
 static const struct common_glue_ctx serpent_enc = {
 	.num_funcs = 2,
@@ -170,423 +189,113 @@ static const struct common_glue_ctx serpent_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				     dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
-}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
+	return glue_xts_req_128bit(&serpent_enc_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
-							SERPENT_BLOCK_SIZE);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen -
-						SERPENT_BLOCK_SIZE);
-}
-EXPORT_SYMBOL_GPL(lrw_serpent_setkey);
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_xts_req_128bit(&serpent_dec_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-void lrw_serpent_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm);
-
-int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-EXPORT_SYMBOL_GPL(xts_serpent_setkey);
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg serpent_algs[10] = { {
-	.cra_name		= "__ecb-serpent-avx",
-	.cra_driver_name	= "__driver-ecb-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-avx",
-	.cra_driver_name	= "__driver-cbc-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-avx",
-	.cra_driver_name	= "__driver-ctr-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-avx",
-	.cra_driver_name	= "__driver-lrw-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_serpent_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-avx",
-	.cra_driver_name	= "__driver-xts-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(serpent)",
+		.base.cra_driver_name	= "__xts-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= 2 * SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= xts_serpent_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init serpent_init(void)
 {
@@ -598,12 +307,15 @@ static int __init serpent_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 
 static void __exit serpent_exit(void)
 {
-	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 
 module_init(serpent_init);
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index ac0e831943f5..3dafe137596a 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -30,21 +30,22 @@
  */
 
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
-#include <crypto/serpent.h>
-#include <crypto/cryptd.h>
 #include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
+#include <crypto/internal/simd.h>
+#include <crypto/serpent.h>
 #include <asm/crypto/serpent-sse2.h>
 #include <asm/crypto/glue_helper.h>
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
 static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
 {
 	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
@@ -139,464 +140,79 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				     dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 
-static inline void serpent_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct serpent_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct serpent_ctx serpent_ctx;
-};
-
-static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
-							SERPENT_BLOCK_SIZE);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen -
-						SERPENT_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-
-struct serpent_xts_ctx {
-	struct serpent_ctx tweak_ctx;
-	struct serpent_ctx crypt_ctx;
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	},
 };
 
-static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->crypt_ctx,
-		.fpu_enabled = false,
-	};
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->crypt_ctx,
-		.fpu_enabled = false,
-	};
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static struct crypto_alg serpent_algs[10] = { {
-	.cra_name		= "__ecb-serpent-sse2",
-	.cra_driver_name	= "__driver-ecb-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-sse2",
-	.cra_driver_name	= "__driver-cbc-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-sse2",
-	.cra_driver_name	= "__driver-ctr-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-sse2",
-	.cra_driver_name	= "__driver-lrw-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-sse2",
-	.cra_driver_name	= "__driver-xts-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-} };
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init serpent_sse2_init(void)
 {
@@ -605,12 +221,15 @@ static int __init serpent_sse2_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 
 static void __exit serpent_sse2_exit(void)
 {
-	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 
 module_init(serpent_sse2_init);
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
index acf9fdf01671..e17655ffde79 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -106,13 +106,6 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
 static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
 						(struct sha1_mb_mgr *state);
 
-static inline void sha1_init_digest(uint32_t *digest)
-{
-	static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
-					SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 {
@@ -244,11 +237,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 					  uint32_t len,
 					  int flags)
 {
-	if (flags & (~HASH_ENTIRE)) {
-		/*
-		 * User should not pass anything other than FIRST, UPDATE, or
-		 * LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		return ctx;
 	}
@@ -259,24 +249,12 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 		return ctx;
 	}
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		return ctx;
 	}
 
-
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha1_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/*
 	 * If we made it here, there were no errors during this call to
 	 * submit
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
index 13590ccf965c..9454bd16f9f8 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
@@ -57,11 +57,9 @@
 #include "sha1_mb_mgr.h"
 
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE	     0x04
-#define HASH_FINAL	     0x08
+#define HASH_LAST            0x01
+#define HASH_DONE	     0x02
+#define HASH_FINAL	     0x04
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
index 7926a226b120..4c46ac1b6653 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -106,14 +106,6 @@ static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
 static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
 			(struct sha256_mb_mgr *state);
 
-inline void sha256_init_digest(uint32_t *digest)
-{
-	static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = {
-				SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
-				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7};
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 {
@@ -245,10 +237,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
 					  uint32_t len,
 					  int flags)
 {
-	if (flags & (~HASH_ENTIRE)) {
-		/* User should not pass anything other than FIRST, UPDATE
-		 * or LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		return ctx;
 	}
@@ -259,23 +249,12 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
 		return ctx;
 	}
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		return ctx;
 	}
 
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha256_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/* If we made it here, there was no error during this call to submit */
 	ctx->error = HASH_CTX_ERROR_NONE;
 
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
index aabb30320af0..7c432543dc7f 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
@@ -57,11 +57,9 @@
 #include "sha256_mb_mgr.h"
 
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE	     0x04
-#define HASH_FINAL	     0x08
+#define HASH_LAST            0x01
+#define HASH_DONE	     0x02
+#define HASH_FINAL	     0x04
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
index 458409b7568d..39e2bbdc1836 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -107,15 +107,6 @@ static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
 static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
 						(struct sha512_mb_mgr *state);
 
-inline void sha512_init_digest(uint64_t *digest)
-{
-	static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = {
-					SHA512_H0, SHA512_H1, SHA512_H2,
-					SHA512_H3, SHA512_H4, SHA512_H5,
-					SHA512_H6, SHA512_H7 };
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 {
@@ -263,11 +254,8 @@ static struct sha512_hash_ctx
 
 	mgr = cstate->mgr;
 	spin_lock_irqsave(&cstate->work_lock, irqflags);
-	if (flags & (~HASH_ENTIRE)) {
-		/*
-		 * User should not pass anything other than FIRST, UPDATE, or
-		 * LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		goto unlock;
 	}
@@ -278,24 +266,12 @@ static struct sha512_hash_ctx
 		goto unlock;
 	}
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		goto unlock;
 	}
 
-
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha512_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/*
 	 * If we made it here, there were no errors during this call to
 	 * submit
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
index e4653f5eec3f..e5c465bd821e 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
@@ -57,11 +57,9 @@
 #include "sha512_mb_mgr.h"
 
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE            0x04
-#define HASH_FINAL           0x08
+#define HASH_LAST            0x01
+#define HASH_DONE            0x02
+#define HASH_FINAL           0x04
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index b7a3904b953c..66d989230d10 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -24,24 +24,15 @@
  */
 
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
 #include <crypto/twofish.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/twofish.h>
 #include <asm/crypto/glue_helper.h>
-#include <crypto/scatterwalk.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
+#include <asm/crypto/twofish.h>
 
 #define TWOFISH_PARALLEL_BLOCKS 8
 
@@ -61,6 +52,12 @@ asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
 asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
 				     const u8 *src, le128 *iv);
 
+static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return twofish_setkey(&tfm->base, key, keylen);
+}
+
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 					const u8 *src)
 {
@@ -79,6 +76,31 @@ static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 				  GLUE_FUNC_CAST(twofish_dec_blk));
 }
 
+struct twofish_xts_ctx {
+	struct twofish_ctx tweak_ctx;
+	struct twofish_ctx crypt_ctx;
+};
+
+static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+				flags);
+}
 
 static const struct common_glue_ctx twofish_enc = {
 	.num_funcs = 3,
@@ -170,389 +192,113 @@ static const struct common_glue_ctx twofish_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
-				       dst, src, nbytes);
+	return glue_ecb_req_128bit(&twofish_enc, req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&twofish_dec, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+					   req);
 }
 
-static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL,
-			      fpu_enabled, nbytes);
+	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
 }
 
-static inline void twofish_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&twofish_ctr, req);
 }
 
-struct crypt_priv {
-	struct twofish_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
-		twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
-		twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	nbytes %= bsize * 3;
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&twofish_enc_xts, req,
+				   XTS_TWEAK_CAST(twofish_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
-		twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
-		twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
-
-	nbytes %= bsize * 3;
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&twofish_dec_xts, req,
+				   XTS_TWEAK_CAST(twofish_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[TWOFISH_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->twofish_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	twofish_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[TWOFISH_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->twofish_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	twofish_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(twofish_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(twofish_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg twofish_algs[10] = { {
-	.cra_name		= "__ecb-twofish-avx",
-	.cra_driver_name	= "__driver-ecb-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-twofish-avx",
-	.cra_driver_name	= "__driver-cbc-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-twofish-avx",
-	.cra_driver_name	= "__driver-ctr-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-twofish-avx",
-	.cra_driver_name	= "__driver-lrw-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_twofish_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= lrw_twofish_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-twofish-avx",
-	.cra_driver_name	= "__driver-xts-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= xts_twofish_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(twofish)",
-	.cra_driver_name	= "ecb-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(twofish)",
-	.cra_driver_name	= "cbc-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(twofish)",
-	.cra_driver_name	= "ctr-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(twofish)",
-	.cra_driver_name	= "lrw-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(twofish)",
-	.cra_driver_name	= "xts-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg twofish_algs[] = {
+	{
+		.base.cra_name		= "__ecb(twofish)",
+		.base.cra_driver_name	= "__ecb-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(twofish)",
+		.base.cra_driver_name	= "__cbc-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(twofish)",
+		.base.cra_driver_name	= "__ctr-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.chunksize		= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(twofish)",
+		.base.cra_driver_name	= "__xts-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * TF_MIN_KEY_SIZE,
+		.max_keysize		= 2 * TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= xts_twofish_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)];
 
 static int __init twofish_init(void)
 {
@@ -563,12 +309,15 @@ static int __init twofish_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+	return simd_register_skciphers_compat(twofish_algs,
+					      ARRAY_SIZE(twofish_algs),
+					      twofish_simd_algs);
 }
 
 static void __exit twofish_exit(void)
 {
-	crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+	simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs),
+				  twofish_simd_algs);
 }
 
 module_init(twofish_init);
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 243e90a4b5d9..571485502ec8 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -20,22 +20,26 @@
  *
  */
 
-#include <asm/processor.h>
+#include <asm/crypto/glue_helper.h>
+#include <asm/crypto/twofish.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/twofish.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/twofish.h>
-#include <crypto/b128ops.h>
-#include <asm/crypto/twofish.h>
-#include <asm/crypto/glue_helper.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
 
 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
 
+static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return twofish_setkey(&tfm->base, key, keylen);
+}
+
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 					const u8 *src)
 {
@@ -151,284 +155,74 @@ static const struct common_glue_ctx twofish_dec_cbc = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct twofish_ctx *ctx = priv;
-	int i;
-
-	if (nbytes == 3 * bsize) {
-		twofish_enc_blk_3way(ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct twofish_ctx *ctx = priv;
-	int i;
-
-	if (nbytes == 3 * bsize) {
-		twofish_dec_blk_3way(ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_dec_blk(ctx, srcdst, srcdst);
-}
-
-int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE,
-			       &tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
+	return glue_ecb_req_128bit(&twofish_enc, req);
 }
-EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->twofish_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_ecb_req_128bit(&twofish_dec, req);
 }
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->twofish_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+					   req);
 }
 
-void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
-
-int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
-}
-EXPORT_SYMBOL_GPL(xts_twofish_setkey);
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[3];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[3];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_ctr_req_128bit(&twofish_ctr, req);
 }
 
-static struct crypto_alg tf_algs[5] = { {
-	.cra_name		= "ecb(twofish)",
-	.cra_driver_name	= "ecb-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(twofish)",
-	.cra_driver_name	= "cbc-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(twofish)",
-	.cra_driver_name	= "ctr-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(twofish)",
-	.cra_driver_name	= "lrw-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_twofish_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= lrw_twofish_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(twofish)",
-	.cra_driver_name	= "xts-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= xts_twofish_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
+static struct skcipher_alg tf_skciphers[] = {
+	{
+		.base.cra_name		= "ecb(twofish)",
+		.base.cra_driver_name	= "ecb-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(twofish)",
+		.base.cra_driver_name	= "cbc-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(twofish)",
+		.base.cra_driver_name	= "ctr-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.chunksize		= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
 	},
-} };
+};
 
 static bool is_blacklisted_cpu(void)
 {
@@ -478,12 +272,13 @@ static int __init init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
+	return crypto_register_skciphers(tf_skciphers,
+					 ARRAY_SIZE(tf_skciphers));
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
+	crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
 }
 
 module_init(init);
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
index 10f8d590bcfe..a5d86fc0593f 100644
--- a/arch/x86/include/asm/crypto/camellia.h
+++ b/arch/x86/include/asm/crypto/camellia.h
@@ -2,8 +2,9 @@
 #ifndef ASM_X86_CAMELLIA_H
 #define ASM_X86_CAMELLIA_H
 
-#include <linux/kernel.h>
+#include <crypto/b128ops.h>
 #include <linux/crypto.h>
+#include <linux/kernel.h>
 
 #define CAMELLIA_MIN_KEY_SIZE	16
 #define CAMELLIA_MAX_KEY_SIZE	32
@@ -11,16 +12,13 @@
 #define CAMELLIA_TABLE_BYTE_LEN	272
 #define CAMELLIA_PARALLEL_BLOCKS 2
 
+struct crypto_skcipher;
+
 struct camellia_ctx {
 	u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
 	u32 key_length;
 };
 
-struct camellia_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct camellia_ctx camellia_ctx;
-};
-
 struct camellia_xts_ctx {
 	struct camellia_ctx tweak_ctx;
 	struct camellia_ctx crypt_ctx;
@@ -30,11 +28,7 @@ extern int __camellia_setkey(struct camellia_ctx *cctx,
 			     const unsigned char *key,
 			     unsigned int key_len, u32 *flags);
 
-extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			       unsigned int keylen);
-extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			       unsigned int keylen);
 
 /* regular block cipher functions */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 553a03de55c3..d1818634ae7e 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -45,7 +45,7 @@ struct common_glue_ctx {
 };
 
 static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
-				  struct blkcipher_desc *desc,
+				  struct skcipher_walk *walk,
 				  bool fpu_enabled, unsigned int nbytes)
 {
 	if (likely(fpu_blocks_limit < 0))
@@ -61,33 +61,6 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
 	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
 		return false;
 
-	if (desc) {
-		/* prevent sleeping if FPU is in use */
-		desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	}
-
-	kernel_fpu_begin();
-	return true;
-}
-
-static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
-					 int fpu_blocks_limit,
-					 struct skcipher_walk *walk,
-					 bool fpu_enabled, unsigned int nbytes)
-{
-	if (likely(fpu_blocks_limit < 0))
-		return false;
-
-	if (fpu_enabled)
-		return true;
-
-	/*
-	 * Vector-registers are only used when chunk to be processed is large
-	 * enough, so do not enable FPU until it is necessary.
-	 */
-	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
-		return false;
-
 	/* prevent sleeping if FPU is in use */
 	skcipher_walk_atomise(walk);
 
@@ -126,41 +99,17 @@ static inline void le128_inc(le128 *i)
 	i->b = cpu_to_le64(b);
 }
 
-extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes);
-
-extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-				   struct blkcipher_desc *desc,
-				   struct scatterlist *dst,
-				   struct scatterlist *src,
-				   unsigned int nbytes);
-
-extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-				   struct blkcipher_desc *desc,
-				   struct scatterlist *dst,
-				   struct scatterlist *src,
-				   unsigned int nbytes);
-
-extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes);
-
-extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes,
-				 common_glue_func_t tweak_fn, void *tweak_ctx,
-				 void *crypt_ctx);
-
-extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes,
-				 common_glue_func_t tweak_fn, void *tweak_ctx,
-				 void *crypt_ctx);
+extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+			       struct skcipher_request *req);
+
+extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
+				       struct skcipher_request *req);
+
+extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+				       struct skcipher_request *req);
+
+extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+			       struct skcipher_request *req);
 
 extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			       struct skcipher_request *req,
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
index c958b7bd0fcb..db7c9cc32234 100644
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -2,15 +2,13 @@
 #ifndef ASM_X86_SERPENT_AVX_H
 #define ASM_X86_SERPENT_AVX_H
 
-#include <linux/crypto.h>
+#include <crypto/b128ops.h>
 #include <crypto/serpent.h>
+#include <linux/types.h>
 
-#define SERPENT_PARALLEL_BLOCKS 8
+struct crypto_skcipher;
 
-struct serpent_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct serpent_ctx serpent_ctx;
-};
+#define SERPENT_PARALLEL_BLOCKS 8
 
 struct serpent_xts_ctx {
 	struct serpent_ctx tweak_ctx;
@@ -38,12 +36,7 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
 extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
 extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
 
-extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
-extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			      unsigned int keylen);
 
 #endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index 65bb80adba3e..f618bf272b90 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -4,19 +4,8 @@
 
 #include <linux/crypto.h>
 #include <crypto/twofish.h>
-#include <crypto/lrw.h>
 #include <crypto/b128ops.h>
 
-struct twofish_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct twofish_ctx twofish_ctx;
-};
-
-struct twofish_xts_ctx {
-	struct twofish_ctx tweak_ctx;
-	struct twofish_ctx crypt_ctx;
-};
-
 /* regular block cipher functions from twofish_x86_64 module */
 asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
 				const u8 *src);
@@ -36,12 +25,4 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
 extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
 				     le128 *iv);
 
-extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
-extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
 #endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 838a4dc90a6d..df92605d8724 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -26,7 +26,7 @@ static inline void signal_compat_build_tests(void)
 	 * new fields are handled in copy_siginfo_to_user32()!
 	 */
 	BUILD_BUG_ON(NSIGILL  != 11);
-	BUILD_BUG_ON(NSIGFPE  != 13);
+	BUILD_BUG_ON(NSIGFPE  != 14);
 	BUILD_BUG_ON(NSIGSEGV != 7);
 	BUILD_BUG_ON(NSIGBUS  != 5);
 	BUILD_BUG_ON(NSIGTRAP != 4);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index b75264b09a46..c0dabed5122e 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -245,10 +245,6 @@ config CRYPTO_TEST
 	help
 	  Quick & dirty crypto test module.
 
-config CRYPTO_ABLK_HELPER
-	tristate
-	select CRYPTO_CRYPTD
-
 config CRYPTO_SIMD
 	tristate
 	select CRYPTO_CRYPTD
@@ -324,6 +320,14 @@ config CRYPTO_CBC
 	  CBC: Cipher Block Chaining mode
 	  This block cipher algorithm is required for IPSec.
 
+config CRYPTO_CFB
+	tristate "CFB support"
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_MANAGER
+	help
+	  CFB: Cipher FeedBack mode
+	  This block cipher algorithm is required for TPM2 Cryptography.
+
 config CRYPTO_CTR
 	tristate "CTR support"
 	select CRYPTO_BLKCIPHER
@@ -1114,7 +1118,7 @@ config CRYPTO_BLOWFISH_COMMON
 config CRYPTO_BLOWFISH_X86_64
 	tristate "Blowfish cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLOWFISH_COMMON
 	help
 	  Blowfish cipher algorithm (x86_64), by Bruce Schneier.
@@ -1145,10 +1149,8 @@ config CRYPTO_CAMELLIA_X86_64
 	tristate "Camellia cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Camellia cipher algorithm module (x86_64).
 
@@ -1164,12 +1166,10 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAMELLIA_X86_64
-	select CRYPTO_LRW
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	help
 	  Camellia cipher algorithm module (x86_64/AES-NI/AVX).
@@ -1186,14 +1186,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_CAMELLIA_X86_64
 	select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Camellia cipher algorithm module (x86_64/AES-NI/AVX2).
 
@@ -1238,11 +1231,10 @@ config CRYPTO_CAST5
 config CRYPTO_CAST5_AVX_X86_64
 	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_CAST_COMMON
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAST5
+	select CRYPTO_CAST_COMMON
+	select CRYPTO_SIMD
 	help
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  described in RFC2144.
@@ -1261,13 +1253,11 @@ config CRYPTO_CAST6
 config CRYPTO_CAST6_AVX_X86_64
 	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_CAST_COMMON
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAST6
-	select CRYPTO_LRW
+	select CRYPTO_CAST_COMMON
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	help
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
@@ -1294,7 +1284,7 @@ config CRYPTO_DES_SPARC64
 config CRYPTO_DES3_EDE_X86_64
 	tristate "Triple DES EDE cipher algorithm (x86-64)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_DES
 	help
 	  Triple DES EDE (FIPS 46-3) algorithm.
@@ -1422,13 +1412,10 @@ config CRYPTO_SERPENT
 config CRYPTO_SERPENT_SSE2_X86_64
 	tristate "Serpent cipher algorithm (x86_64/SSE2)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
-	select CRYPTO_XTS
+	select CRYPTO_SIMD
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
@@ -1444,13 +1431,10 @@ config CRYPTO_SERPENT_SSE2_X86_64
 config CRYPTO_SERPENT_SSE2_586
 	tristate "Serpent cipher algorithm (i586/SSE2)"
 	depends on X86 && !64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
-	select CRYPTO_XTS
+	select CRYPTO_SIMD
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
@@ -1466,12 +1450,10 @@ config CRYPTO_SERPENT_SSE2_586
 config CRYPTO_SERPENT_AVX_X86_64
 	tristate "Serpent cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
@@ -1488,14 +1470,7 @@ config CRYPTO_SERPENT_AVX_X86_64
 config CRYPTO_SERPENT_AVX2_X86_64
 	tristate "Serpent cipher algorithm (x86_64/AVX2)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_SERPENT
 	select CRYPTO_SERPENT_AVX_X86_64
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
@@ -1508,6 +1483,45 @@ config CRYPTO_SERPENT_AVX2_X86_64
 	  See also:
 	  <http://www.cl.cam.ac.uk/~rja14/serpent.html>
 
+config CRYPTO_SM4
+	tristate "SM4 cipher algorithm"
+	select CRYPTO_ALGAPI
+	help
+	  SM4 cipher algorithms (OSCCA GB/T 32907-2016).
+
+	  SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+	  Organization of State Commercial Administration of China (OSCCA)
+	  as an authorized cryptographic algorithms for the use within China.
+
+	  SMS4 was originally created for use in protecting wireless
+	  networks, and is mandated in the Chinese National Standard for
+	  Wireless LAN WAPI (Wired Authentication and Privacy Infrastructure)
+	  (GB.15629.11-2003).
+
+	  The latest SM4 standard (GBT.32907-2016) was proposed by OSCCA and
+	  standardized through TC 260 of the Standardization Administration
+	  of the People's Republic of China (SAC).
+
+	  The input, output, and key of SMS4 are each 128 bits.
+
+	  See also: <https://eprint.iacr.org/2008/329.pdf>
+
+	  If unsure, say N.
+
+config CRYPTO_SPECK
+	tristate "Speck cipher algorithm"
+	select CRYPTO_ALGAPI
+	help
+	  Speck is a lightweight block cipher that is tuned for optimal
+	  performance in software (rather than hardware).
+
+	  Speck may not be as secure as AES, and should only be used on systems
+	  where AES is not fast enough.
+
+	  See also: <https://eprint.iacr.org/2013/404.pdf>
+
+	  If unsure, say N.
+
 config CRYPTO_TEA
 	tristate "TEA, XTEA and XETA cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -1581,12 +1595,10 @@ config CRYPTO_TWOFISH_X86_64
 config CRYPTO_TWOFISH_X86_64_3WAY
 	tristate "Twofish cipher algorithm (x86_64, 3-way parallel)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Twofish cipher algorithm (x86_64, 3-way parallel).
 
@@ -1604,15 +1616,12 @@ config CRYPTO_TWOFISH_X86_64_3WAY
 config CRYPTO_TWOFISH_AVX_X86_64
 	tristate "Twofish cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_TWOFISH_X86_64_3WAY
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Twofish cipher algorithm (x86_64/AVX).
 
diff --git a/crypto/Makefile b/crypto/Makefile
index cdbc03b35510..4fc69fe94e6a 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
 obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
 obj-$(CONFIG_CRYPTO_ECB) += ecb.o
 obj-$(CONFIG_CRYPTO_CBC) += cbc.o
+obj-$(CONFIG_CRYPTO_CFB) += cfb.o
 obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
 obj-$(CONFIG_CRYPTO_CTS) += cts.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
@@ -100,6 +101,7 @@ obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
 CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
 CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
+obj-$(CONFIG_CRYPTO_SM4) += sm4_generic.o
 obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
 obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
@@ -110,6 +112,7 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
+obj-$(CONFIG_CRYPTO_SPECK) += speck.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
@@ -149,6 +152,5 @@ obj-$(CONFIG_XOR_BLOCKS) += xor.o
 obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
 obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
-obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
 crypto_simd-y := simd.o
 obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
deleted file mode 100644
index 09776bb1360e..000000000000
--- a/crypto/ablk_helper.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Shared async block cipher helpers
- *
- * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on aesni-intel_glue.c by:
- *  Copyright (C) 2008, Intel Corp.
- *    Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <crypto/algapi.h>
-#include <crypto/cryptd.h>
-#include <crypto/ablk_helper.h>
-#include <asm/simd.h>
-
-int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
-		 unsigned int key_len)
-{
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
-	int err;
-
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
-				    & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, key_len);
-	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
-				    & CRYPTO_TFM_RES_MASK);
-	return err;
-}
-EXPORT_SYMBOL_GPL(ablk_set_key);
-
-int __ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct blkcipher_desc desc;
-
-	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-	desc.info = req->info;
-	desc.flags = 0;
-
-	return crypto_blkcipher_crt(desc.tfm)->encrypt(
-		&desc, req->dst, req->src, req->nbytes);
-}
-EXPORT_SYMBOL_GPL(__ablk_encrypt);
-
-int ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!may_use_simd() ||
-	    (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		*cryptd_req = *req;
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_encrypt(cryptd_req);
-	} else {
-		return __ablk_encrypt(req);
-	}
-}
-EXPORT_SYMBOL_GPL(ablk_encrypt);
-
-int ablk_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!may_use_simd() ||
-	    (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		*cryptd_req = *req;
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_decrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-
-		return crypto_blkcipher_crt(desc.tfm)->decrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-EXPORT_SYMBOL_GPL(ablk_decrypt);
-
-void ablk_exit(struct crypto_tfm *tfm)
-{
-	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-EXPORT_SYMBOL_GPL(ablk_exit);
-
-int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
-{
-	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, CRYPTO_ALG_INTERNAL,
-					     CRYPTO_ALG_INTERNAL);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
-		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ablk_init_common);
-
-int ablk_init(struct crypto_tfm *tfm)
-{
-	char drv_name[CRYPTO_MAX_ALG_NAME];
-
-	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
-					crypto_tfm_alg_driver_name(tfm));
-
-	return ablk_init_common(tfm, drv_name);
-}
-EXPORT_SYMBOL_GPL(ablk_init);
-
-MODULE_LICENSE("GPL");
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 266fc1d64f61..a64c143165b1 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -92,13 +92,14 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err)
 
 	if (nbytes && walk->offset & alignmask && !err) {
 		walk->offset = ALIGN(walk->offset, alignmask + 1);
-		walk->data += walk->offset;
-
 		nbytes = min(nbytes,
 			     ((unsigned int)(PAGE_SIZE)) - walk->offset);
 		walk->entrylen -= nbytes;
 
-		return nbytes;
+		if (nbytes) {
+			walk->data += walk->offset;
+			return nbytes;
+		}
 	}
 
 	if (walk->flags & CRYPTO_ALG_ASYNC)
@@ -446,24 +447,12 @@ static int ahash_def_finup(struct ahash_request *req)
 	return ahash_def_finup_finish1(req, err);
 }
 
-static int ahash_no_export(struct ahash_request *req, void *out)
-{
-	return -ENOSYS;
-}
-
-static int ahash_no_import(struct ahash_request *req, const void *in)
-{
-	return -ENOSYS;
-}
-
 static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
 	struct ahash_alg *alg = crypto_ahash_alg(hash);
 
 	hash->setkey = ahash_nosetkey;
-	hash->export = ahash_no_export;
-	hash->import = ahash_no_import;
 
 	if (tfm->__crt_alg->cra_type != &crypto_ahash_type)
 		return crypto_init_shash_ops_async(tfm);
@@ -473,16 +462,14 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 	hash->final = alg->final;
 	hash->finup = alg->finup ?: ahash_def_finup;
 	hash->digest = alg->digest;
+	hash->export = alg->export;
+	hash->import = alg->import;
 
 	if (alg->setkey) {
 		hash->setkey = alg->setkey;
 		if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
 			crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY);
 	}
-	if (alg->export)
-		hash->export = alg->export;
-	if (alg->import)
-		hash->import = alg->import;
 
 	return 0;
 }
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 395b082d03a9..2a0271b5f62a 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -543,9 +543,6 @@ int crypto_register_instance(struct crypto_template *tmpl,
 	inst->alg.cra_module = tmpl->module;
 	inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE;
 
-	if (unlikely(!crypto_mod_get(&inst->alg)))
-		return -EAGAIN;
-
 	down_write(&crypto_alg_sem);
 
 	larval = __crypto_register_alg(&inst->alg);
@@ -563,14 +560,9 @@ unlock:
 		goto err;
 
 	crypto_wait_for_test(larval);
-
-	/* Remove instance if test failed */
-	if (!(inst->alg.cra_flags & CRYPTO_ALG_TESTED))
-		crypto_unregister_instance(inst);
 	err = 0;
 
 err:
-	crypto_mod_put(&inst->alg);
 	return err;
 }
 EXPORT_SYMBOL_GPL(crypto_register_instance);
diff --git a/crypto/api.c b/crypto/api.c
index 70a894e52ff3..1d5290c67108 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -193,17 +193,24 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 	return alg;
 }
 
-struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask)
+static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
+					    u32 mask)
 {
 	struct crypto_alg *alg;
+	u32 test = 0;
+
+	if (!((type | mask) & CRYPTO_ALG_TESTED))
+		test |= CRYPTO_ALG_TESTED;
 
 	down_read(&crypto_alg_sem);
-	alg = __crypto_alg_lookup(name, type, mask);
+	alg = __crypto_alg_lookup(name, type | test, mask | test);
+	if (!alg && test)
+		alg = __crypto_alg_lookup(name, type, mask) ?
+		      ERR_PTR(-ELIBBAD) : NULL;
 	up_read(&crypto_alg_sem);
 
 	return alg;
 }
-EXPORT_SYMBOL_GPL(crypto_alg_lookup);
 
 static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 					       u32 mask)
@@ -227,10 +234,12 @@ static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 		alg = crypto_alg_lookup(name, type, mask);
 	}
 
-	if (alg)
-		return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;
+	if (!IS_ERR_OR_NULL(alg) && crypto_is_larval(alg))
+		alg = crypto_larval_wait(alg);
+	else if (!alg)
+		alg = crypto_larval_add(name, type, mask);
 
-	return crypto_larval_add(name, type, mask);
+	return alg;
 }
 
 int crypto_probing_notify(unsigned long val, void *v)
@@ -253,11 +262,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 	struct crypto_alg *larval;
 	int ok;
 
-	if (!((type | mask) & CRYPTO_ALG_TESTED)) {
-		type |= CRYPTO_ALG_TESTED;
-		mask |= CRYPTO_ALG_TESTED;
-	}
-
 	/*
 	 * If the internal flag is set for a cipher, require a caller to
 	 * to invoke the cipher with the internal flag to use that cipher.
@@ -485,20 +489,14 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
 				   const struct crypto_type *frontend,
 				   u32 type, u32 mask)
 {
-	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask) =
-		crypto_alg_mod_lookup;
-
 	if (frontend) {
 		type &= frontend->maskclear;
 		mask &= frontend->maskclear;
 		type |= frontend->type;
 		mask |= frontend->maskset;
-
-		if (frontend->lookup)
-			lookup = frontend->lookup;
 	}
 
-	return lookup(alg_name, type, mask);
+	return crypto_alg_mod_lookup(alg_name, type, mask);
 }
 EXPORT_SYMBOL_GPL(crypto_find_alg);
 
diff --git a/crypto/cfb.c b/crypto/cfb.c
new file mode 100644
index 000000000000..94ee39bed758
--- /dev/null
+++ b/crypto/cfb.c
@@ -0,0 +1,353 @@
+//SPDX-License-Identifier: GPL-2.0
+/*
+ * CFB: Cipher FeedBack mode
+ *
+ * Copyright (c) 2018 James.Bottomley@HansenPartnership.com
+ *
+ * CFB is a stream cipher mode which is layered on to a block
+ * encryption scheme.  It works very much like a one time pad where
+ * the pad is generated initially from the encrypted IV and then
+ * subsequently from the encrypted previous block of ciphertext.  The
+ * pad is XOR'd into the plain text to get the final ciphertext.
+ *
+ * The scheme of CFB is best described by wikipedia:
+ *
+ * https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#CFB
+ *
+ * Note that since the pad for both encryption and decryption is
+ * generated by an encryption operation, CFB never uses the block
+ * decryption function.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+struct crypto_cfb_ctx {
+	struct crypto_cipher *child;
+};
+
+static unsigned int crypto_cfb_bsize(struct crypto_skcipher *tfm)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	return crypto_cipher_blocksize(child);
+}
+
+static void crypto_cfb_encrypt_one(struct crypto_skcipher *tfm,
+					  const u8 *src, u8 *dst)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_cipher_encrypt_one(ctx->child, dst, src);
+}
+
+/* final encrypt and decrypt is the same */
+static void crypto_cfb_final(struct skcipher_walk *walk,
+			     struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	const unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	u8 tmp[bsize + alignmask];
+	u8 *stream = PTR_ALIGN(tmp + 0, alignmask + 1);
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+	unsigned int nbytes = walk->nbytes;
+
+	crypto_cfb_encrypt_one(tfm, iv, stream);
+	crypto_xor_cpy(dst, stream, src, nbytes);
+}
+
+static int crypto_cfb_encrypt_segment(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, dst);
+		crypto_xor(dst, src, bsize);
+		memcpy(iv, dst, bsize);
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_encrypt_inplace(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+	u8 tmp[bsize];
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, tmp);
+		crypto_xor(src, tmp, bsize);
+		iv = src;
+
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
+	unsigned int bsize = crypto_cfb_bsize(tfm);
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes >= bsize) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			err = crypto_cfb_encrypt_inplace(&walk, tfm);
+		else
+			err = crypto_cfb_encrypt_segment(&walk, tfm);
+		err = skcipher_walk_done(&walk, err);
+	}
+
+	if (walk.nbytes) {
+		crypto_cfb_final(&walk, tfm);
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	return err;
+}
+
+static int crypto_cfb_decrypt_segment(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, dst);
+		crypto_xor(dst, iv, bsize);
+		iv = src;
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_decrypt_inplace(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+	u8 tmp[bsize];
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, tmp);
+		memcpy(iv, src, bsize);
+		crypto_xor(src, tmp, bsize);
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_decrypt_blocks(struct skcipher_walk *walk,
+				     struct crypto_skcipher *tfm)
+{
+	if (walk->src.virt.addr == walk->dst.virt.addr)
+		return crypto_cfb_decrypt_inplace(walk, tfm);
+	else
+		return crypto_cfb_decrypt_segment(walk, tfm);
+}
+
+static int crypto_cfb_setkey(struct crypto_skcipher *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_cfb_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes >= bsize) {
+		err = crypto_cfb_decrypt_blocks(&walk, tfm);
+		err = skcipher_walk_done(&walk, err);
+	}
+
+	if (walk.nbytes) {
+		crypto_cfb_final(&walk, tfm);
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	return err;
+}
+
+static int crypto_cfb_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *cipher;
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+	return 0;
+}
+
+static void crypto_cfb_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_cipher(ctx->child);
+}
+
+static void crypto_cfb_free(struct skcipher_instance *inst)
+{
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+static int crypto_cfb_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct crypto_spawn *spawn;
+	struct crypto_alg *alg;
+	u32 mask;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
+	if (err)
+		return err;
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	algt = crypto_get_attr_type(tb);
+	err = PTR_ERR(algt);
+	if (IS_ERR(algt))
+		goto err_free_inst;
+
+	mask = CRYPTO_ALG_TYPE_MASK |
+		crypto_requires_off(algt->type, algt->mask,
+				    CRYPTO_ALG_NEED_FALLBACK);
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
+	err = PTR_ERR(alg);
+	if (IS_ERR(alg))
+		goto err_free_inst;
+
+	spawn = skcipher_instance_ctx(inst);
+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	crypto_mod_put(alg);
+	if (err)
+		goto err_free_inst;
+
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "cfb", alg);
+	if (err)
+		goto err_drop_spawn;
+
+	inst->alg.base.cra_priority = alg->cra_priority;
+	/* we're a stream cipher independend of the crypto cra_blocksize */
+	inst->alg.base.cra_blocksize = 1;
+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
+
+	inst->alg.ivsize = alg->cra_blocksize;
+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_cfb_ctx);
+
+	inst->alg.init = crypto_cfb_init_tfm;
+	inst->alg.exit = crypto_cfb_exit_tfm;
+
+	inst->alg.setkey = crypto_cfb_setkey;
+	inst->alg.encrypt = crypto_cfb_encrypt;
+	inst->alg.decrypt = crypto_cfb_decrypt;
+
+	inst->free = crypto_cfb_free;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_spawn(spawn);
+err_free_inst:
+	kfree(inst);
+	goto out;
+}
+
+static struct crypto_template crypto_cfb_tmpl = {
+	.name = "cfb",
+	.create = crypto_cfb_create,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_cfb_module_init(void)
+{
+	return crypto_register_template(&crypto_cfb_tmpl);
+}
+
+static void __exit crypto_cfb_module_exit(void)
+{
+	crypto_unregister_template(&crypto_cfb_tmpl);
+}
+
+module_init(crypto_cfb_module_init);
+module_exit(crypto_cfb_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CFB block cipher algorithm");
+MODULE_ALIAS_CRYPTO("cfb");
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index 61e7c4e02fd2..992e8d8dcdd9 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -15,13 +15,50 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <crypto/engine.h>
-#include <crypto/internal/hash.h>
 #include <uapi/linux/sched/types.h>
 #include "internal.h"
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
 
 /**
+ * crypto_finalize_request - finalize one request if the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+static void crypto_finalize_request(struct crypto_engine *engine,
+			     struct crypto_async_request *req, int err)
+{
+	unsigned long flags;
+	bool finalize_cur_req = false;
+	int ret;
+	struct crypto_engine_ctx *enginectx;
+
+	spin_lock_irqsave(&engine->queue_lock, flags);
+	if (engine->cur_req == req)
+		finalize_cur_req = true;
+	spin_unlock_irqrestore(&engine->queue_lock, flags);
+
+	if (finalize_cur_req) {
+		enginectx = crypto_tfm_ctx(req->tfm);
+		if (engine->cur_req_prepared &&
+		    enginectx->op.unprepare_request) {
+			ret = enginectx->op.unprepare_request(engine, req);
+			if (ret)
+				dev_err(engine->dev, "failed to unprepare request\n");
+		}
+		spin_lock_irqsave(&engine->queue_lock, flags);
+		engine->cur_req = NULL;
+		engine->cur_req_prepared = false;
+		spin_unlock_irqrestore(&engine->queue_lock, flags);
+	}
+
+	req->complete(req, err);
+
+	kthread_queue_work(engine->kworker, &engine->pump_requests);
+}
+
+/**
  * crypto_pump_requests - dequeue one request from engine queue to process
  * @engine: the hardware engine
  * @in_kthread: true if we are in the context of the request pump thread
@@ -34,11 +71,10 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 				 bool in_kthread)
 {
 	struct crypto_async_request *async_req, *backlog;
-	struct ahash_request *hreq;
-	struct ablkcipher_request *breq;
 	unsigned long flags;
 	bool was_busy = false;
-	int ret, rtype;
+	int ret;
+	struct crypto_engine_ctx *enginectx;
 
 	spin_lock_irqsave(&engine->queue_lock, flags);
 
@@ -94,7 +130,6 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 
-	rtype = crypto_tfm_alg_type(engine->cur_req->tfm);
 	/* Until here we get the request need to be encrypted successfully */
 	if (!was_busy && engine->prepare_crypt_hardware) {
 		ret = engine->prepare_crypt_hardware(engine);
@@ -104,57 +139,31 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		}
 	}
 
-	switch (rtype) {
-	case CRYPTO_ALG_TYPE_AHASH:
-		hreq = ahash_request_cast(engine->cur_req);
-		if (engine->prepare_hash_request) {
-			ret = engine->prepare_hash_request(engine, hreq);
-			if (ret) {
-				dev_err(engine->dev, "failed to prepare request: %d\n",
-					ret);
-				goto req_err;
-			}
-			engine->cur_req_prepared = true;
-		}
-		ret = engine->hash_one_request(engine, hreq);
-		if (ret) {
-			dev_err(engine->dev, "failed to hash one request from queue\n");
-			goto req_err;
-		}
-		return;
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		breq = ablkcipher_request_cast(engine->cur_req);
-		if (engine->prepare_cipher_request) {
-			ret = engine->prepare_cipher_request(engine, breq);
-			if (ret) {
-				dev_err(engine->dev, "failed to prepare request: %d\n",
-					ret);
-				goto req_err;
-			}
-			engine->cur_req_prepared = true;
-		}
-		ret = engine->cipher_one_request(engine, breq);
+	enginectx = crypto_tfm_ctx(async_req->tfm);
+
+	if (enginectx->op.prepare_request) {
+		ret = enginectx->op.prepare_request(engine, async_req);
 		if (ret) {
-			dev_err(engine->dev, "failed to cipher one request from queue\n");
+			dev_err(engine->dev, "failed to prepare request: %d\n",
+				ret);
 			goto req_err;
 		}
-		return;
-	default:
-		dev_err(engine->dev, "failed to prepare request of unknown type\n");
-		return;
+		engine->cur_req_prepared = true;
+	}
+	if (!enginectx->op.do_one_request) {
+		dev_err(engine->dev, "failed to do request\n");
+		ret = -EINVAL;
+		goto req_err;
 	}
+	ret = enginectx->op.do_one_request(engine, async_req);
+	if (ret) {
+		dev_err(engine->dev, "Failed to do one request from queue: %d\n", ret);
+		goto req_err;
+	}
+	return;
 
 req_err:
-	switch (rtype) {
-	case CRYPTO_ALG_TYPE_AHASH:
-		hreq = ahash_request_cast(engine->cur_req);
-		crypto_finalize_hash_request(engine, hreq, ret);
-		break;
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		breq = ablkcipher_request_cast(engine->cur_req);
-		crypto_finalize_cipher_request(engine, breq, ret);
-		break;
-	}
+	crypto_finalize_request(engine, async_req, ret);
 	return;
 
 out:
@@ -170,13 +179,12 @@ static void crypto_pump_work(struct kthread_work *work)
 }
 
 /**
- * crypto_transfer_cipher_request - transfer the new request into the
- * enginequeue
+ * crypto_transfer_request - transfer the new request into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
-int crypto_transfer_cipher_request(struct crypto_engine *engine,
-				   struct ablkcipher_request *req,
+static int crypto_transfer_request(struct crypto_engine *engine,
+				   struct crypto_async_request *req,
 				   bool need_pump)
 {
 	unsigned long flags;
@@ -189,7 +197,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
 		return -ESHUTDOWN;
 	}
 
-	ret = ablkcipher_enqueue_request(&engine->queue, req);
+	ret = crypto_enqueue_request(&engine->queue, req);
 
 	if (!engine->busy && need_pump)
 		kthread_queue_work(engine->kworker, &engine->pump_requests);
@@ -197,102 +205,131 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request);
 
 /**
- * crypto_transfer_cipher_request_to_engine - transfer one request to list
+ * crypto_transfer_request_to_engine - transfer one request to list
  * into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
-int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
-					     struct ablkcipher_request *req)
+static int crypto_transfer_request_to_engine(struct crypto_engine *engine,
+					     struct crypto_async_request *req)
 {
-	return crypto_transfer_cipher_request(engine, req, true);
+	return crypto_transfer_request(engine, req, true);
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request_to_engine);
 
 /**
- * crypto_transfer_hash_request - transfer the new request into the
- * enginequeue
+ * crypto_transfer_ablkcipher_request_to_engine - transfer one ablkcipher_request
+ * to list into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
+ * TODO: Remove this function when skcipher conversion is finished
  */
-int crypto_transfer_hash_request(struct crypto_engine *engine,
-				 struct ahash_request *req, bool need_pump)
+int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
+						 struct ablkcipher_request *req)
 {
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-
-	if (!engine->running) {
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-		return -ESHUTDOWN;
-	}
-
-	ret = ahash_enqueue_request(&engine->queue, req);
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_ablkcipher_request_to_engine);
 
-	if (!engine->busy && need_pump)
-		kthread_queue_work(engine->kworker, &engine->pump_requests);
+/**
+ * crypto_transfer_aead_request_to_engine - transfer one aead_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
+					   struct aead_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_aead_request_to_engine);
 
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-	return ret;
+/**
+ * crypto_transfer_akcipher_request_to_engine - transfer one akcipher_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
+					       struct akcipher_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_hash_request);
+EXPORT_SYMBOL_GPL(crypto_transfer_akcipher_request_to_engine);
 
 /**
- * crypto_transfer_hash_request_to_engine - transfer one request to list
- * into the engine queue
+ * crypto_transfer_hash_request_to_engine - transfer one ahash_request
+ * to list into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
 int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
 					   struct ahash_request *req)
 {
-	return crypto_transfer_hash_request(engine, req, true);
+	return crypto_transfer_request_to_engine(engine, &req->base);
 }
 EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
 
 /**
- * crypto_finalize_cipher_request - finalize one request if the request is done
+ * crypto_transfer_skcipher_request_to_engine - transfer one skcipher_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
+					       struct skcipher_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_skcipher_request_to_engine);
+
+/**
+ * crypto_finalize_ablkcipher_request - finalize one ablkcipher_request if
+ * the request is done
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @err: error number
+ * TODO: Remove this function when skcipher conversion is finished
  */
-void crypto_finalize_cipher_request(struct crypto_engine *engine,
-				    struct ablkcipher_request *req, int err)
+void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
+					struct ablkcipher_request *req, int err)
 {
-	unsigned long flags;
-	bool finalize_cur_req = false;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-	if (engine->cur_req == &req->base)
-		finalize_cur_req = true;
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-
-	if (finalize_cur_req) {
-		if (engine->cur_req_prepared &&
-		    engine->unprepare_cipher_request) {
-			ret = engine->unprepare_cipher_request(engine, req);
-			if (ret)
-				dev_err(engine->dev, "failed to unprepare request\n");
-		}
-		spin_lock_irqsave(&engine->queue_lock, flags);
-		engine->cur_req = NULL;
-		engine->cur_req_prepared = false;
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-	}
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_ablkcipher_request);
 
-	req->base.complete(&req->base, err);
+/**
+ * crypto_finalize_aead_request - finalize one aead_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_aead_request(struct crypto_engine *engine,
+				  struct aead_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_aead_request);
 
-	kthread_queue_work(engine->kworker, &engine->pump_requests);
+/**
+ * crypto_finalize_akcipher_request - finalize one akcipher_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_akcipher_request(struct crypto_engine *engine,
+				      struct akcipher_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
 }
-EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
+EXPORT_SYMBOL_GPL(crypto_finalize_akcipher_request);
 
 /**
- * crypto_finalize_hash_request - finalize one request if the request is done
+ * crypto_finalize_hash_request - finalize one ahash_request if
+ * the request is done
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @err: error number
@@ -300,35 +337,25 @@ EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
 void crypto_finalize_hash_request(struct crypto_engine *engine,
 				  struct ahash_request *req, int err)
 {
-	unsigned long flags;
-	bool finalize_cur_req = false;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-	if (engine->cur_req == &req->base)
-		finalize_cur_req = true;
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-
-	if (finalize_cur_req) {
-		if (engine->cur_req_prepared &&
-		    engine->unprepare_hash_request) {
-			ret = engine->unprepare_hash_request(engine, req);
-			if (ret)
-				dev_err(engine->dev, "failed to unprepare request\n");
-		}
-		spin_lock_irqsave(&engine->queue_lock, flags);
-		engine->cur_req = NULL;
-		engine->cur_req_prepared = false;
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-	}
-
-	req->base.complete(&req->base, err);
-
-	kthread_queue_work(engine->kworker, &engine->pump_requests);
+	return crypto_finalize_request(engine, &req->base, err);
 }
 EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
 
 /**
+ * crypto_finalize_skcipher_request - finalize one skcipher_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_skcipher_request(struct crypto_engine *engine,
+				      struct skcipher_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_skcipher_request);
+
+/**
  * crypto_engine_start - start the hardware engine
  * @engine: the hardware engine need to be started
  *
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 5c291eedaa70..0e89b5457cab 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -271,7 +271,7 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 		return -ENOENT;
 
 	err = -ENOMEM;
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!skb)
 		goto drop_alg;
 
diff --git a/crypto/ecc.c b/crypto/ecc.c
index 18f32f2a5e1c..9c066b5ac12d 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -1025,9 +1025,7 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 {
 	int ret = 0;
 	struct ecc_point *product, *pk;
-	u64 priv[ndigits];
-	u64 rand_z[ndigits];
-	unsigned int nbytes;
+	u64 *priv, *rand_z;
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 
 	if (!private_key || !public_key || !curve) {
@@ -1035,14 +1033,22 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 		goto out;
 	}
 
-	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	priv = kmalloc_array(ndigits, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
-	get_random_bytes(rand_z, nbytes);
+	rand_z = kmalloc_array(ndigits, sizeof(*rand_z), GFP_KERNEL);
+	if (!rand_z) {
+		ret = -ENOMEM;
+		goto kfree_out;
+	}
 
 	pk = ecc_alloc_point(ndigits);
 	if (!pk) {
 		ret = -ENOMEM;
-		goto out;
+		goto kfree_out;
 	}
 
 	product = ecc_alloc_point(ndigits);
@@ -1051,6 +1057,8 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 		goto err_alloc_product;
 	}
 
+	get_random_bytes(rand_z, ndigits << ECC_DIGITS_TO_BYTES_SHIFT);
+
 	ecc_swap_digits(public_key, pk->x, ndigits);
 	ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
 	ecc_swap_digits(private_key, priv, ndigits);
@@ -1065,6 +1073,9 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 	ecc_free_point(product);
 err_alloc_product:
 	ecc_free_point(pk);
+kfree_out:
+	kzfree(priv);
+	kzfree(rand_z);
 out:
 	return ret;
 }
diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 3aca0933ec44..d2ec33f0e098 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -89,12 +89,19 @@ static int ecdh_compute_value(struct kpp_request *req)
 		if (!shared_secret)
 			goto free_pubkey;
 
-		copied = sg_copy_to_buffer(req->src, 1, public_key,
-					   public_key_sz);
-		if (copied != public_key_sz) {
-			ret = -EINVAL;
+		/* from here on it's invalid parameters */
+		ret = -EINVAL;
+
+		/* must have exactly two points to be on the curve */
+		if (public_key_sz != req->src_len)
+			goto free_all;
+
+		copied = sg_copy_to_buffer(req->src,
+					   sg_nents_for_len(req->src,
+							    public_key_sz),
+					   public_key, public_key_sz);
+		if (copied != public_key_sz)
 			goto free_all;
-		}
 
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
 						ctx->private_key, public_key,
@@ -111,7 +118,11 @@ static int ecdh_compute_value(struct kpp_request *req)
 	if (ret < 0)
 		goto free_all;
 
-	copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
+	/* might want less than we've got */
+	nbytes = min_t(size_t, nbytes, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
 	if (copied != nbytes)
 		ret = -EINVAL;
 
diff --git a/crypto/internal.h b/crypto/internal.h
index 5ac27fba10e8..9a3f39939fba 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -67,7 +67,6 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg)
 }
 
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
-struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask);
 struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 
 int crypto_init_cipher_ops(struct crypto_tfm *tfm);
diff --git a/crypto/lrw.c b/crypto/lrw.c
index cbbd7c50ad19..954a7064a179 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -28,13 +28,31 @@
 
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
-#include <crypto/lrw.h>
 
 #define LRW_BUFFER_SIZE 128u
 
+#define LRW_BLOCK_SIZE 16
+
 struct priv {
 	struct crypto_skcipher *child;
-	struct lrw_table_ctx table;
+
+	/*
+	 * optimizes multiplying a random (non incrementing, as at the
+	 * start of a new sector) value with key2, we could also have
+	 * used 4k optimization tables or no optimization at all. In the
+	 * latter case we would have to store key2 here
+	 */
+	struct gf128mul_64k *table;
+
+	/*
+	 * stores:
+	 *  key2*{ 0,0,...0,0,0,0,1 }, key2*{ 0,0,...0,0,0,1,1 },
+	 *  key2*{ 0,0,...0,0,1,1,1 }, key2*{ 0,0,...0,1,1,1,1 }
+	 *  key2*{ 0,0,...1,1,1,1,1 }, etc
+	 * needed for optimized multiplication of incrementing values
+	 * with key2
+	 */
+	be128 mulinc[128];
 };
 
 struct rctx {
@@ -65,11 +83,25 @@ static inline void setbit128_bbe(void *b, int bit)
 			), b);
 }
 
-int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak)
+static int setkey(struct crypto_skcipher *parent, const u8 *key,
+		  unsigned int keylen)
 {
+	struct priv *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child = ctx->child;
+	int err, bsize = LRW_BLOCK_SIZE;
+	const u8 *tweak = key + keylen - bsize;
 	be128 tmp = { 0 };
 	int i;
 
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen - bsize);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
+
 	if (ctx->table)
 		gf128mul_free_64k(ctx->table);
 
@@ -87,34 +119,6 @@ int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(lrw_init_table);
-
-void lrw_free_table(struct lrw_table_ctx *ctx)
-{
-	if (ctx->table)
-		gf128mul_free_64k(ctx->table);
-}
-EXPORT_SYMBOL_GPL(lrw_free_table);
-
-static int setkey(struct crypto_skcipher *parent, const u8 *key,
-		  unsigned int keylen)
-{
-	struct priv *ctx = crypto_skcipher_ctx(parent);
-	struct crypto_skcipher *child = ctx->child;
-	int err, bsize = LRW_BLOCK_SIZE;
-	const u8 *tweak = key + keylen - bsize;
-
-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
-					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, keylen - bsize);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->table, tweak);
-}
 
 static inline void inc(be128 *iv)
 {
@@ -238,7 +242,7 @@ static int pre_crypt(struct skcipher_request *req)
 			/* T <- I*Key2, using the optimization
 			 * discussed in the specification */
 			be128_xor(&rctx->t, &rctx->t,
-				  &ctx->table.mulinc[get_index128(iv)]);
+				  &ctx->mulinc[get_index128(iv)]);
 			inc(iv);
 		} while ((avail -= bs) >= bs);
 
@@ -301,7 +305,7 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
 	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
 
 	/* T <- I*Key2 */
-	gf128mul_64k_bbe(&rctx->t, ctx->table.table);
+	gf128mul_64k_bbe(&rctx->t, ctx->table);
 
 	return 0;
 }
@@ -313,7 +317,7 @@ static void exit_crypt(struct skcipher_request *req)
 	rctx->left = 0;
 
 	if (rctx->ext)
-		kfree(rctx->ext);
+		kzfree(rctx->ext);
 }
 
 static int do_encrypt(struct skcipher_request *req, int err)
@@ -416,85 +420,6 @@ static int decrypt(struct skcipher_request *req)
 	return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 
-int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
-	      struct scatterlist *ssrc, unsigned int nbytes,
-	      struct lrw_crypt_req *req)
-{
-	const unsigned int bsize = LRW_BLOCK_SIZE;
-	const unsigned int max_blks = req->tbuflen / bsize;
-	struct lrw_table_ctx *ctx = req->table_ctx;
-	struct blkcipher_walk walk;
-	unsigned int nblocks;
-	be128 *iv, *src, *dst, *t;
-	be128 *t_buf = req->tbuf;
-	int err, i;
-
-	BUG_ON(max_blks < 1);
-
-	blkcipher_walk_init(&walk, sdst, ssrc, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	nblocks = min(walk.nbytes / bsize, max_blks);
-	src = (be128 *)walk.src.virt.addr;
-	dst = (be128 *)walk.dst.virt.addr;
-
-	/* calculate first value of T */
-	iv = (be128 *)walk.iv;
-	t_buf[0] = *iv;
-
-	/* T <- I*Key2 */
-	gf128mul_64k_bbe(&t_buf[0], ctx->table);
-
-	i = 0;
-	goto first;
-
-	for (;;) {
-		do {
-			for (i = 0; i < nblocks; i++) {
-				/* T <- I*Key2, using the optimization
-				 * discussed in the specification */
-				be128_xor(&t_buf[i], t,
-						&ctx->mulinc[get_index128(iv)]);
-				inc(iv);
-first:
-				t = &t_buf[i];
-
-				/* PP <- T xor P */
-				be128_xor(dst + i, t, src + i);
-			}
-
-			/* CC <- E(Key2,PP) */
-			req->crypt_fn(req->crypt_ctx, (u8 *)dst,
-				      nblocks * bsize);
-
-			/* C <- T xor CC */
-			for (i = 0; i < nblocks; i++)
-				be128_xor(dst + i, dst + i, &t_buf[i]);
-
-			src += nblocks;
-			dst += nblocks;
-			nbytes -= nblocks * bsize;
-			nblocks = min(nbytes / bsize, max_blks);
-		} while (nblocks > 0);
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-		if (!nbytes)
-			break;
-
-		nblocks = min(nbytes / bsize, max_blks);
-		src = (be128 *)walk.src.virt.addr;
-		dst = (be128 *)walk.dst.virt.addr;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(lrw_crypt);
-
 static int init_tfm(struct crypto_skcipher *tfm)
 {
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
@@ -518,7 +443,8 @@ static void exit_tfm(struct crypto_skcipher *tfm)
 {
 	struct priv *ctx = crypto_skcipher_ctx(tfm);
 
-	lrw_free_table(&ctx->table);
+	if (ctx->table)
+		gf128mul_free_64k(ctx->table);
 	crypto_free_skcipher(ctx->child);
 }
 
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index fe5129d6ff4e..f14152147ce8 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -367,7 +367,7 @@ static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
 		goto out;
 
 	rctx->out = req->result;
-	err = ahash_mcryptd_update(&rctx->areq);
+	err = crypto_ahash_update(&rctx->areq);
 	if (err) {
 		req->base.complete = rctx->complete;
 		goto out;
@@ -394,7 +394,7 @@ static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
 		goto out;
 
 	rctx->out = req->result;
-	err = ahash_mcryptd_final(&rctx->areq);
+	err = crypto_ahash_final(&rctx->areq);
 	if (err) {
 		req->base.complete = rctx->complete;
 		goto out;
@@ -420,7 +420,7 @@ static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 	rctx->out = req->result;
-	err = ahash_mcryptd_finup(&rctx->areq);
+	err = crypto_ahash_finup(&rctx->areq);
 
 	if (err) {
 		req->base.complete = rctx->complete;
@@ -455,7 +455,7 @@ static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
 						rctx->complete, req_async);
 
 	rctx->out = req->result;
-	err = ahash_mcryptd_digest(desc);
+	err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
 
 out:
 	local_bh_disable();
@@ -612,32 +612,6 @@ struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
 }
 EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
 
-int ahash_mcryptd_digest(struct ahash_request *desc)
-{
-	return crypto_ahash_init(desc) ?: ahash_mcryptd_finup(desc);
-}
-
-int ahash_mcryptd_update(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_update(desc);
-}
-
-int ahash_mcryptd_finup(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_finup(desc);
-}
-
-int ahash_mcryptd_final(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_final(desc);
-}
-
 struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
 {
 	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
diff --git a/crypto/md4.c b/crypto/md4.c
index 3515af425cc9..810fefb0a007 100644
--- a/crypto/md4.c
+++ b/crypto/md4.c
@@ -64,23 +64,6 @@ static inline u32 H(u32 x, u32 y, u32 z)
 #define ROUND2(a,b,c,d,k,s) (a = lshift(a + G(b,c,d) + k + (u32)0x5A827999,s))
 #define ROUND3(a,b,c,d,k,s) (a = lshift(a + H(b,c,d) + k + (u32)0x6ED9EBA1,s))
 
-/* XXX: this stuff can be optimized */
-static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__le32_to_cpus(buf);
-		buf++;
-	}
-}
-
-static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__cpu_to_le32s(buf);
-		buf++;
-	}
-}
-
 static void md4_transform(u32 *hash, u32 const *in)
 {
 	u32 a, b, c, d;
diff --git a/crypto/md5.c b/crypto/md5.c
index f7ae1a48225b..f776ef43d621 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -32,23 +32,6 @@ const u8 md5_zero_message_hash[MD5_DIGEST_SIZE] = {
 };
 EXPORT_SYMBOL_GPL(md5_zero_message_hash);
 
-/* XXX: this stuff can be optimized */
-static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__le32_to_cpus(buf);
-		buf++;
-	}
-}
-
-static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__cpu_to_le32s(buf);
-		buf++;
-	}
-}
-
 #define F1(x, y, z)	(z ^ (x & (y ^ z)))
 #define F2(x, y, z)	F1(z, x, y)
 #define F3(x, y, z)	(x ^ y ^ z)
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 2908f93c3e55..9893dbfc1af4 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -192,7 +192,7 @@ static int pkcs1pad_encrypt_sign_complete(struct akcipher_request *req, int err)
 	if (likely(!pad_len))
 		goto out;
 
-	out_buf = kzalloc(ctx->key_size, GFP_ATOMIC);
+	out_buf = kzalloc(ctx->key_size, GFP_KERNEL);
 	err = -ENOMEM;
 	if (!out_buf)
 		goto out;
diff --git a/crypto/simd.c b/crypto/simd.c
index 208226d7f908..ea7240be3001 100644
--- a/crypto/simd.c
+++ b/crypto/simd.c
@@ -221,4 +221,54 @@ void simd_skcipher_free(struct simd_skcipher_alg *salg)
 }
 EXPORT_SYMBOL_GPL(simd_skcipher_free);
 
+int simd_register_skciphers_compat(struct skcipher_alg *algs, int count,
+				   struct simd_skcipher_alg **simd_algs)
+{
+	int err;
+	int i;
+	const char *algname;
+	const char *drvname;
+	const char *basename;
+	struct simd_skcipher_alg *simd;
+
+	err = crypto_register_skciphers(algs, count);
+	if (err)
+		return err;
+
+	for (i = 0; i < count; i++) {
+		WARN_ON(strncmp(algs[i].base.cra_name, "__", 2));
+		WARN_ON(strncmp(algs[i].base.cra_driver_name, "__", 2));
+		algname = algs[i].base.cra_name + 2;
+		drvname = algs[i].base.cra_driver_name + 2;
+		basename = algs[i].base.cra_driver_name;
+		simd = simd_skcipher_create_compat(algname, drvname, basename);
+		err = PTR_ERR(simd);
+		if (IS_ERR(simd))
+			goto err_unregister;
+		simd_algs[i] = simd;
+	}
+	return 0;
+
+err_unregister:
+	simd_unregister_skciphers(algs, count, simd_algs);
+	return err;
+}
+EXPORT_SYMBOL_GPL(simd_register_skciphers_compat);
+
+void simd_unregister_skciphers(struct skcipher_alg *algs, int count,
+			       struct simd_skcipher_alg **simd_algs)
+{
+	int i;
+
+	crypto_unregister_skciphers(algs, count);
+
+	for (i = 0; i < count; i++) {
+		if (simd_algs[i]) {
+			simd_skcipher_free(simd_algs[i]);
+			simd_algs[i] = NULL;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(simd_unregister_skciphers);
+
 MODULE_LICENSE("GPL");
diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c
new file mode 100644
index 000000000000..f537a2766c55
--- /dev/null
+++ b/crypto/sm4_generic.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * SM4 Cipher Algorithm.
+ *
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ * All rights reserved.
+ */
+
+#include <crypto/sm4.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+static const u32 fk[4] = {
+	0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static const u8 sbox[256] = {
+	0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+	0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+	0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+	0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+	0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+	0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+	0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+	0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+	0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+	0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+	0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+	0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+	0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+	0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+	0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+	0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+	0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+	0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+	0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+	0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+	0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+	0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+	0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+	0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+	0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+	0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+	0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+	0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+	0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+	0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+	0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+	0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+};
+
+static const u32 ck[] = {
+	0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+	0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+	0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+	0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+	0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+	0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+	0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+	0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+static u32 sm4_t_non_lin_sub(u32 x)
+{
+	int i;
+	u8 *b = (u8 *)&x;
+
+	for (i = 0; i < 4; ++i)
+		b[i] = sbox[b[i]];
+
+	return x;
+}
+
+static u32 sm4_key_lin_sub(u32 x)
+{
+	return x ^ rol32(x, 13) ^ rol32(x, 23);
+
+}
+
+static u32 sm4_enc_lin_sub(u32 x)
+{
+	return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
+}
+
+static u32 sm4_key_sub(u32 x)
+{
+	return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static u32 sm4_enc_sub(u32 x)
+{
+	return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static u32 sm4_round(const u32 *x, const u32 rk)
+{
+	return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
+}
+
+
+/**
+ * crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
+ * @ctx:	The location where the computed key will be stored.
+ * @in_key:	The supplied key.
+ * @key_len:	The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ */
+int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
+			  unsigned int key_len)
+{
+	u32 rk[4], t;
+	const u32 *key = (u32 *)in_key;
+	int i;
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < 4; ++i)
+		rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
+
+	for (i = 0; i < 32; ++i) {
+		t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
+		ctx->rkey_enc[i] = t;
+		rk[0] = rk[1];
+		rk[1] = rk[2];
+		rk[2] = rk[3];
+		rk[3] = t;
+	}
+
+	for (i = 0; i < 32; ++i)
+		ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
+
+/**
+ * crypto_sm4_set_key - Set the AES key.
+ * @tfm:	The %crypto_tfm that is used in the context.
+ * @in_key:	The input key.
+ * @key_len:	The size of the key.
+ *
+ * Returns 0 on success, on failure the %CRYPTO_TFM_RES_BAD_KEY_LEN flag in tfm
+ * is set. The function uses crypto_sm4_expand_key() to expand the key.
+ * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
+ * retrieved with crypto_tfm_ctx().
+ */
+int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int ret;
+
+	ret = crypto_sm4_expand_key(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
+
+static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
+{
+	u32 x[4], i, t;
+
+	for (i = 0; i < 4; ++i)
+		x[i] = get_unaligned_be32(&in[i]);
+
+	for (i = 0; i < 32; ++i) {
+		t = sm4_round(x, rk[i]);
+		x[0] = x[1];
+		x[1] = x[2];
+		x[2] = x[3];
+		x[3] = t;
+	}
+
+	for (i = 0; i < 4; ++i)
+		put_unaligned_be32(x[3 - i], &out[i]);
+}
+
+/* encrypt a block of text */
+
+static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
+}
+
+/* decrypt a block of text */
+
+static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
+}
+
+static struct crypto_alg sm4_alg = {
+	.cra_name		=	"sm4",
+	.cra_driver_name	=	"sm4-generic",
+	.cra_priority		=	100,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	SM4_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypto_sm4_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	SM4_KEY_SIZE,
+			.cia_max_keysize	=	SM4_KEY_SIZE,
+			.cia_setkey		=	crypto_sm4_set_key,
+			.cia_encrypt		=	sm4_encrypt,
+			.cia_decrypt		=	sm4_decrypt
+		}
+	}
+};
+
+static int __init sm4_init(void)
+{
+	return crypto_register_alg(&sm4_alg);
+}
+
+static void __exit sm4_fini(void)
+{
+	crypto_unregister_alg(&sm4_alg);
+}
+
+module_init(sm4_init);
+module_exit(sm4_fini);
+
+MODULE_DESCRIPTION("SM4 Cipher Algorithm");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-generic");
diff --git a/crypto/speck.c b/crypto/speck.c
new file mode 100644
index 000000000000..58aa9f7f91f7
--- /dev/null
+++ b/crypto/speck.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Speck: a lightweight block cipher
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Speck has 10 variants, including 5 block sizes.  For now we only implement
+ * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
+ * Speck64/128.   Speck${B}/${K} denotes the variant with a block size of B bits
+ * and a key size of K bits.  The Speck128 variants are believed to be the most
+ * secure variants, and they use the same block size and key sizes as AES.  The
+ * Speck64 variants are less secure, but on 32-bit processors are usually
+ * faster.  The remaining variants (Speck32, Speck48, and Speck96) are even less
+ * secure and/or not as well suited for implementation on either 32-bit or
+ * 64-bit processors, so are omitted.
+ *
+ * Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
+ * https://eprint.iacr.org/2013/404.pdf
+ *
+ * In a correspondence, the Speck designers have also clarified that the words
+ * should be interpreted in little-endian format, and the words should be
+ * ordered such that the first word of each block is 'y' rather than 'x', and
+ * the first key word (rather than the last) becomes the first round key.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/speck.h>
+#include <linux/bitops.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+/* Speck128 */
+
+static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
+{
+	*x = ror64(*x, 8);
+	*x += *y;
+	*x ^= k;
+	*y = rol64(*y, 3);
+	*y ^= *x;
+}
+
+static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
+{
+	*y ^= *x;
+	*y = ror64(*y, 3);
+	*x ^= k;
+	*x -= *y;
+	*x = rol64(*x, 8);
+}
+
+void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
+			     u8 *out, const u8 *in)
+{
+	u64 y = get_unaligned_le64(in);
+	u64 x = get_unaligned_le64(in + 8);
+	int i;
+
+	for (i = 0; i < ctx->nrounds; i++)
+		speck128_round(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le64(y, out);
+	put_unaligned_le64(x, out + 8);
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
+
+static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
+			     u8 *out, const u8 *in)
+{
+	u64 y = get_unaligned_le64(in);
+	u64 x = get_unaligned_le64(in + 8);
+	int i;
+
+	for (i = ctx->nrounds - 1; i >= 0; i--)
+		speck128_unround(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le64(y, out);
+	put_unaligned_le64(x, out + 8);
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
+
+static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
+			   unsigned int keylen)
+{
+	u64 l[3];
+	u64 k;
+	int i;
+
+	switch (keylen) {
+	case SPECK128_128_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		ctx->nrounds = SPECK128_128_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[0], &k, i);
+		}
+		break;
+	case SPECK128_192_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		l[1] = get_unaligned_le64(key + 16);
+		ctx->nrounds = SPECK128_192_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[i % 2], &k, i);
+		}
+		break;
+	case SPECK128_256_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		l[1] = get_unaligned_le64(key + 16);
+		l[2] = get_unaligned_le64(key + 24);
+		ctx->nrounds = SPECK128_256_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[i % 3], &k, i);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
+
+static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
+}
+
+/* Speck64 */
+
+static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
+{
+	*x = ror32(*x, 8);
+	*x += *y;
+	*x ^= k;
+	*y = rol32(*y, 3);
+	*y ^= *x;
+}
+
+static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
+{
+	*y ^= *x;
+	*y = ror32(*y, 3);
+	*x ^= k;
+	*x -= *y;
+	*x = rol32(*x, 8);
+}
+
+void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
+			    u8 *out, const u8 *in)
+{
+	u32 y = get_unaligned_le32(in);
+	u32 x = get_unaligned_le32(in + 4);
+	int i;
+
+	for (i = 0; i < ctx->nrounds; i++)
+		speck64_round(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le32(y, out);
+	put_unaligned_le32(x, out + 4);
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
+
+static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
+			    u8 *out, const u8 *in)
+{
+	u32 y = get_unaligned_le32(in);
+	u32 x = get_unaligned_le32(in + 4);
+	int i;
+
+	for (i = ctx->nrounds - 1; i >= 0; i--)
+		speck64_unround(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le32(y, out);
+	put_unaligned_le32(x, out + 4);
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
+
+static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
+			  unsigned int keylen)
+{
+	u32 l[3];
+	u32 k;
+	int i;
+
+	switch (keylen) {
+	case SPECK64_96_KEY_SIZE:
+		k = get_unaligned_le32(key);
+		l[0] = get_unaligned_le32(key + 4);
+		l[1] = get_unaligned_le32(key + 8);
+		ctx->nrounds = SPECK64_96_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck64_round(&l[i % 2], &k, i);
+		}
+		break;
+	case SPECK64_128_KEY_SIZE:
+		k = get_unaligned_le32(key);
+		l[0] = get_unaligned_le32(key + 4);
+		l[1] = get_unaligned_le32(key + 8);
+		l[2] = get_unaligned_le32(key + 12);
+		ctx->nrounds = SPECK64_128_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck64_round(&l[i % 3], &k, i);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
+
+static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
+			  unsigned int keylen)
+{
+	return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
+}
+
+/* Algorithm definitions */
+
+static struct crypto_alg speck_algs[] = {
+	{
+		.cra_name		= "speck128",
+		.cra_driver_name	= "speck128-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= SPECK128_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct speck128_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.cipher = {
+				.cia_min_keysize	= SPECK128_128_KEY_SIZE,
+				.cia_max_keysize	= SPECK128_256_KEY_SIZE,
+				.cia_setkey		= speck128_setkey,
+				.cia_encrypt		= speck128_encrypt,
+				.cia_decrypt		= speck128_decrypt
+			}
+		}
+	}, {
+		.cra_name		= "speck64",
+		.cra_driver_name	= "speck64-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= SPECK64_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct speck64_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.cipher = {
+				.cia_min_keysize	= SPECK64_96_KEY_SIZE,
+				.cia_max_keysize	= SPECK64_128_KEY_SIZE,
+				.cia_setkey		= speck64_setkey,
+				.cia_encrypt		= speck64_encrypt,
+				.cia_decrypt		= speck64_decrypt
+			}
+		}
+	}
+};
+
+static int __init speck_module_init(void)
+{
+	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_module_exit(void)
+{
+	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_module_init);
+module_exit(speck_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (generic)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("speck128");
+MODULE_ALIAS_CRYPTO("speck128-generic");
+MODULE_ALIAS_CRYPTO("speck64");
+MODULE_ALIAS_CRYPTO("speck64-generic");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 14213a096fd2..51fe7c8744ae 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1983,6 +1983,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 	case 190:
 		ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))");
 		break;
+	case 191:
+		ret += tcrypt_test("ecb(sm4)");
+		break;
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index d5e23a142a04..af4a01c5037b 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3001,6 +3001,33 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "ecb(sm4)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(sm4_enc_tv_template),
+				.dec = __VECS(sm4_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "ecb(speck128)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck128_enc_tv_template),
+				.dec = __VECS(speck128_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "ecb(speck64)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck64_enc_tv_template),
+				.dec = __VECS(speck64_dec_tv_template)
+			}
+		}
+	}, {
 		.alg = "ecb(tea)",
 		.test = alg_test_skcipher,
 		.suite = {
@@ -3558,6 +3585,24 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "xts(speck128)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck128_xts_enc_tv_template),
+				.dec = __VECS(speck128_xts_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "xts(speck64)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck64_xts_enc_tv_template),
+				.dec = __VECS(speck64_xts_dec_tv_template)
+			}
+		}
+	}, {
 		.alg = "xts(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 6044f6906bd6..004c0a0f8004 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -548,7 +548,7 @@ static const struct akcipher_testvec rsa_tv_template[] = {
 static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
 	{
 	.key =
-	"\x30\x82\x03\x1f\x02\x01\x10\x02\x82\x01\x01\x00\xd7\x1e\x77\x82"
+	"\x30\x82\x03\x1f\x02\x01\x00\x02\x82\x01\x01\x00\xd7\x1e\x77\x82"
 	"\x8c\x92\x31\xe7\x69\x02\xa2\xd5\x5c\x78\xde\xa2\x0c\x8f\xfe\x28"
 	"\x59\x31\xdf\x40\x9c\x60\x61\x06\xb9\x2f\x62\x40\x80\x76\xcb\x67"
 	"\x4a\xb5\x59\x56\x69\x17\x07\xfa\xf9\x4c\xbd\x6c\x37\x7a\x46\x7d"
@@ -597,8 +597,8 @@ static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
 	"\xfe\xf8\x27\x1b\xd6\x55\x60\x5e\x48\xb7\x6d\x9a\xa8\x37\xf9\x7a"
 	"\xde\x1b\xcd\x5d\x1a\x30\xd4\xe9\x9e\x5b\x3c\x15\xf8\x9c\x1f\xda"
 	"\xd1\x86\x48\x55\xce\x83\xee\x8e\x51\xc7\xde\x32\x12\x47\x7d\x46"
-	"\xb8\x35\xdf\x41\x02\x01\x30\x02\x01\x30\x02\x01\x30\x02\x01\x30"
-	"\x02\x01\x30",
+	"\xb8\x35\xdf\x41\x02\x01\x00\x02\x01\x00\x02\x01\x00\x02\x01\x00"
+	"\x02\x01\x00",
 	.key_len = 804,
 	/*
 	 * m is SHA256 hash of following message:
@@ -2044,6 +2044,265 @@ static const struct hash_testvec crct10dif_tv_template[] = {
 		.digest		= (u8 *)(u16 []){ 0x44c6 },
 		.np		= 4,
 		.tap		= { 1, 255, 57, 6 },
+	}, {
+		.plaintext =	"\x6e\x05\x79\x10\xa7\x1b\xb2\x49"
+				"\xe0\x54\xeb\x82\x19\x8d\x24\xbb"
+				"\x2f\xc6\x5d\xf4\x68\xff\x96\x0a"
+				"\xa1\x38\xcf\x43\xda\x71\x08\x7c"
+				"\x13\xaa\x1e\xb5\x4c\xe3\x57\xee"
+				"\x85\x1c\x90\x27\xbe\x32\xc9\x60"
+				"\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2"
+				"\x46\xdd\x74\x0b\x7f\x16\xad\x21"
+				"\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93"
+				"\x2a\xc1\x35\xcc\x63\xfa\x6e\x05"
+				"\x9c\x10\xa7\x3e\xd5\x49\xe0\x77"
+				"\x0e\x82\x19\xb0\x24\xbb\x52\xe9"
+				"\x5d\xf4\x8b\x22\x96\x2d\xc4\x38"
+				"\xcf\x66\xfd\x71\x08\x9f\x13\xaa"
+				"\x41\xd8\x4c\xe3\x7a\x11\x85\x1c"
+				"\xb3\x27\xbe\x55\xec\x60\xf7\x8e"
+				"\x02\x99\x30\xc7\x3b\xd2\x69\x00"
+				"\x74\x0b\xa2\x16\xad\x44\xdb\x4f"
+				"\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1"
+				"\x58\xef\x63\xfa\x91\x05\x9c\x33"
+				"\xca\x3e\xd5\x6c\x03\x77\x0e\xa5"
+				"\x19\xb0\x47\xde\x52\xe9\x80\x17"
+				"\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66"
+				"\xfd\x94\x08\x9f\x36\xcd\x41\xd8"
+				"\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a"
+				"\xe1\x55\xec\x83\x1a\x8e\x25\xbc"
+				"\x30\xc7\x5e\xf5\x69\x00\x97\x0b"
+				"\xa2\x39\xd0\x44\xdb\x72\x09\x7d"
+				"\x14\xab\x1f\xb6\x4d\xe4\x58\xef"
+				"\x86\x1d\x91\x28\xbf\x33\xca\x61"
+				"\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3"
+				"\x47\xde\x75\x0c\x80\x17\xae\x22"
+				"\xb9\x50\xe7\x5b\xf2\x89\x20\x94"
+				"\x2b\xc2\x36\xcd\x64\xfb\x6f\x06"
+				"\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78"
+				"\x0f\x83\x1a\xb1\x25\xbc\x53\xea"
+				"\x5e\xf5\x8c\x00\x97\x2e\xc5\x39"
+				"\xd0\x67\xfe\x72\x09\xa0\x14\xab"
+				"\x42\xd9\x4d\xe4\x7b\x12\x86\x1d"
+				"\xb4\x28\xbf\x56\xed\x61\xf8\x8f"
+				"\x03\x9a\x31\xc8\x3c\xd3\x6a\x01"
+				"\x75\x0c\xa3\x17\xae\x45\xdc\x50"
+				"\xe7\x7e\x15\x89\x20\xb7\x2b\xc2"
+				"\x59\xf0\x64\xfb\x92\x06\x9d\x34"
+				"\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6"
+				"\x1a\xb1\x48\xdf\x53\xea\x81\x18"
+				"\x8c\x23\xba\x2e\xc5\x5c\xf3\x67"
+				"\xfe\x95\x09\xa0\x37\xce\x42\xd9"
+				"\x70\x07\x7b\x12\xa9\x1d\xb4\x4b"
+				"\xe2\x56\xed\x84\x1b\x8f\x26\xbd"
+				"\x31\xc8\x5f\xf6\x6a\x01\x98\x0c"
+				"\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e"
+				"\x15\xac\x20\xb7\x4e\xe5\x59\xf0"
+				"\x87\x1e\x92\x29\xc0\x34\xcb\x62"
+				"\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4"
+				"\x48\xdf\x76\x0d\x81\x18\xaf\x23"
+				"\xba\x51\xe8\x5c\xf3\x8a\x21\x95"
+				"\x2c\xc3\x37\xce\x65\xfc\x70\x07"
+				"\x9e\x12\xa9\x40\xd7\x4b\xe2\x79"
+				"\x10\x84\x1b\xb2\x26\xbd\x54\xeb"
+				"\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a"
+				"\xd1\x68\xff\x73\x0a\xa1\x15\xac"
+				"\x43\xda\x4e\xe5\x7c\x13\x87\x1e"
+				"\xb5\x29\xc0\x57\xee\x62\xf9\x90"
+				"\x04\x9b\x32\xc9\x3d\xd4\x6b\x02"
+				"\x76\x0d\xa4\x18\xaf\x46\xdd\x51"
+				"\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3"
+				"\x5a\xf1\x65\xfc\x93\x07\x9e\x35"
+				"\xcc\x40\xd7\x6e\x05\x79\x10\xa7"
+				"\x1b\xb2\x49\xe0\x54\xeb\x82\x19"
+				"\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68"
+				"\xff\x96\x0a\xa1\x38\xcf\x43\xda"
+				"\x71\x08\x7c\x13\xaa\x1e\xb5\x4c"
+				"\xe3\x57\xee\x85\x1c\x90\x27\xbe"
+				"\x32\xc9\x60\xf7\x6b\x02\x99\x0d"
+				"\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f"
+				"\x16\xad\x21\xb8\x4f\xe6\x5a\xf1"
+				"\x88\x1f\x93\x2a\xc1\x35\xcc\x63"
+				"\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5"
+				"\x49\xe0\x77\x0e\x82\x19\xb0\x24"
+				"\xbb\x52\xe9\x5d\xf4\x8b\x22\x96"
+				"\x2d\xc4\x38\xcf\x66\xfd\x71\x08"
+				"\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a"
+				"\x11\x85\x1c\xb3\x27\xbe\x55\xec"
+				"\x60\xf7\x8e\x02\x99\x30\xc7\x3b"
+				"\xd2\x69\x00\x74\x0b\xa2\x16\xad"
+				"\x44\xdb\x4f\xe6\x7d\x14\x88\x1f"
+				"\xb6\x2a\xc1\x58\xef\x63\xfa\x91"
+				"\x05\x9c\x33\xca\x3e\xd5\x6c\x03"
+				"\x77\x0e\xa5\x19\xb0\x47\xde\x52"
+				"\xe9\x80\x17\x8b\x22\xb9\x2d\xc4"
+				"\x5b\xf2\x66\xfd\x94\x08\x9f\x36"
+				"\xcd\x41\xd8\x6f\x06\x7a\x11\xa8"
+				"\x1c\xb3\x4a\xe1\x55\xec\x83\x1a"
+				"\x8e\x25\xbc\x30\xc7\x5e\xf5\x69"
+				"\x00\x97\x0b\xa2\x39\xd0\x44\xdb"
+				"\x72\x09\x7d\x14\xab\x1f\xb6\x4d"
+				"\xe4\x58\xef\x86\x1d\x91\x28\xbf"
+				"\x33\xca\x61\xf8\x6c\x03\x9a\x0e"
+				"\xa5\x3c\xd3\x47\xde\x75\x0c\x80"
+				"\x17\xae\x22\xb9\x50\xe7\x5b\xf2"
+				"\x89\x20\x94\x2b\xc2\x36\xcd\x64"
+				"\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6"
+				"\x4a\xe1\x78\x0f\x83\x1a\xb1\x25"
+				"\xbc\x53\xea\x5e\xf5\x8c\x00\x97"
+				"\x2e\xc5\x39\xd0\x67\xfe\x72\x09"
+				"\xa0\x14\xab\x42\xd9\x4d\xe4\x7b"
+				"\x12\x86\x1d\xb4\x28\xbf\x56\xed"
+				"\x61\xf8\x8f\x03\x9a\x31\xc8\x3c"
+				"\xd3\x6a\x01\x75\x0c\xa3\x17\xae"
+				"\x45\xdc\x50\xe7\x7e\x15\x89\x20"
+				"\xb7\x2b\xc2\x59\xf0\x64\xfb\x92"
+				"\x06\x9d\x34\xcb\x3f\xd6\x6d\x04"
+				"\x78\x0f\xa6\x1a\xb1\x48\xdf\x53"
+				"\xea\x81\x18\x8c\x23\xba\x2e\xc5"
+				"\x5c\xf3\x67\xfe\x95\x09\xa0\x37"
+				"\xce\x42\xd9\x70\x07\x7b\x12\xa9"
+				"\x1d\xb4\x4b\xe2\x56\xed\x84\x1b"
+				"\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a"
+				"\x01\x98\x0c\xa3\x3a\xd1\x45\xdc"
+				"\x73\x0a\x7e\x15\xac\x20\xb7\x4e"
+				"\xe5\x59\xf0\x87\x1e\x92\x29\xc0"
+				"\x34\xcb\x62\xf9\x6d\x04\x9b\x0f"
+				"\xa6\x3d\xd4\x48\xdf\x76\x0d\x81"
+				"\x18\xaf\x23\xba\x51\xe8\x5c\xf3"
+				"\x8a\x21\x95\x2c\xc3\x37\xce\x65"
+				"\xfc\x70\x07\x9e\x12\xa9\x40\xd7"
+				"\x4b\xe2\x79\x10\x84\x1b\xb2\x26"
+				"\xbd\x54\xeb\x5f\xf6\x8d\x01\x98"
+				"\x2f\xc6\x3a\xd1\x68\xff\x73\x0a"
+				"\xa1\x15\xac\x43\xda\x4e\xe5\x7c"
+				"\x13\x87\x1e\xb5\x29\xc0\x57\xee"
+				"\x62\xf9\x90\x04\x9b\x32\xc9\x3d"
+				"\xd4\x6b\x02\x76\x0d\xa4\x18\xaf"
+				"\x46\xdd\x51\xe8\x7f\x16\x8a\x21"
+				"\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93"
+				"\x07\x9e\x35\xcc\x40\xd7\x6e\x05"
+				"\x79\x10\xa7\x1b\xb2\x49\xe0\x54"
+				"\xeb\x82\x19\x8d\x24\xbb\x2f\xc6"
+				"\x5d\xf4\x68\xff\x96\x0a\xa1\x38"
+				"\xcf\x43\xda\x71\x08\x7c\x13\xaa"
+				"\x1e\xb5\x4c\xe3\x57\xee\x85\x1c"
+				"\x90\x27\xbe\x32\xc9\x60\xf7\x6b"
+				"\x02\x99\x0d\xa4\x3b\xd2\x46\xdd"
+				"\x74\x0b\x7f\x16\xad\x21\xb8\x4f"
+				"\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1"
+				"\x35\xcc\x63\xfa\x6e\x05\x9c\x10"
+				"\xa7\x3e\xd5\x49\xe0\x77\x0e\x82"
+				"\x19\xb0\x24\xbb\x52\xe9\x5d\xf4"
+				"\x8b\x22\x96\x2d\xc4\x38\xcf\x66"
+				"\xfd\x71\x08\x9f\x13\xaa\x41\xd8"
+				"\x4c\xe3\x7a\x11\x85\x1c\xb3\x27"
+				"\xbe\x55\xec\x60\xf7\x8e\x02\x99"
+				"\x30\xc7\x3b\xd2\x69\x00\x74\x0b"
+				"\xa2\x16\xad\x44\xdb\x4f\xe6\x7d"
+				"\x14\x88\x1f\xb6\x2a\xc1\x58\xef"
+				"\x63\xfa\x91\x05\x9c\x33\xca\x3e"
+				"\xd5\x6c\x03\x77\x0e\xa5\x19\xb0"
+				"\x47\xde\x52\xe9\x80\x17\x8b\x22"
+				"\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94"
+				"\x08\x9f\x36\xcd\x41\xd8\x6f\x06"
+				"\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55"
+				"\xec\x83\x1a\x8e\x25\xbc\x30\xc7"
+				"\x5e\xf5\x69\x00\x97\x0b\xa2\x39"
+				"\xd0\x44\xdb\x72\x09\x7d\x14\xab"
+				"\x1f\xb6\x4d\xe4\x58\xef\x86\x1d"
+				"\x91\x28\xbf\x33\xca\x61\xf8\x6c"
+				"\x03\x9a\x0e\xa5\x3c\xd3\x47\xde"
+				"\x75\x0c\x80\x17\xae\x22\xb9\x50"
+				"\xe7\x5b\xf2\x89\x20\x94\x2b\xc2"
+				"\x36\xcd\x64\xfb\x6f\x06\x9d\x11"
+				"\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83"
+				"\x1a\xb1\x25\xbc\x53\xea\x5e\xf5"
+				"\x8c\x00\x97\x2e\xc5\x39\xd0\x67"
+				"\xfe\x72\x09\xa0\x14\xab\x42\xd9"
+				"\x4d\xe4\x7b\x12\x86\x1d\xb4\x28"
+				"\xbf\x56\xed\x61\xf8\x8f\x03\x9a"
+				"\x31\xc8\x3c\xd3\x6a\x01\x75\x0c"
+				"\xa3\x17\xae\x45\xdc\x50\xe7\x7e"
+				"\x15\x89\x20\xb7\x2b\xc2\x59\xf0"
+				"\x64\xfb\x92\x06\x9d\x34\xcb\x3f"
+				"\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1"
+				"\x48\xdf\x53\xea\x81\x18\x8c\x23"
+				"\xba\x2e\xc5\x5c\xf3\x67\xfe\x95"
+				"\x09\xa0\x37\xce\x42\xd9\x70\x07"
+				"\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56"
+				"\xed\x84\x1b\x8f\x26\xbd\x31\xc8"
+				"\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a"
+				"\xd1\x45\xdc\x73\x0a\x7e\x15\xac"
+				"\x20\xb7\x4e\xe5\x59\xf0\x87\x1e"
+				"\x92\x29\xc0\x34\xcb\x62\xf9\x6d"
+				"\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf"
+				"\x76\x0d\x81\x18\xaf\x23\xba\x51"
+				"\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3"
+				"\x37\xce\x65\xfc\x70\x07\x9e\x12"
+				"\xa9\x40\xd7\x4b\xe2\x79\x10\x84"
+				"\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6"
+				"\x8d\x01\x98\x2f\xc6\x3a\xd1\x68"
+				"\xff\x73\x0a\xa1\x15\xac\x43\xda"
+				"\x4e\xe5\x7c\x13\x87\x1e\xb5\x29"
+				"\xc0\x57\xee\x62\xf9\x90\x04\x9b"
+				"\x32\xc9\x3d\xd4\x6b\x02\x76\x0d"
+				"\xa4\x18\xaf\x46\xdd\x51\xe8\x7f"
+				"\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1"
+				"\x65\xfc\x93\x07\x9e\x35\xcc\x40"
+				"\xd7\x6e\x05\x79\x10\xa7\x1b\xb2"
+				"\x49\xe0\x54\xeb\x82\x19\x8d\x24"
+				"\xbb\x2f\xc6\x5d\xf4\x68\xff\x96"
+				"\x0a\xa1\x38\xcf\x43\xda\x71\x08"
+				"\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57"
+				"\xee\x85\x1c\x90\x27\xbe\x32\xc9"
+				"\x60\xf7\x6b\x02\x99\x0d\xa4\x3b"
+				"\xd2\x46\xdd\x74\x0b\x7f\x16\xad"
+				"\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f"
+				"\x93\x2a\xc1\x35\xcc\x63\xfa\x6e"
+				"\x05\x9c\x10\xa7\x3e\xd5\x49\xe0"
+				"\x77\x0e\x82\x19\xb0\x24\xbb\x52"
+				"\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4"
+				"\x38\xcf\x66\xfd\x71\x08\x9f\x13"
+				"\xaa\x41\xd8\x4c\xe3\x7a\x11\x85"
+				"\x1c\xb3\x27\xbe\x55\xec\x60\xf7"
+				"\x8e\x02\x99\x30\xc7\x3b\xd2\x69"
+				"\x00\x74\x0b\xa2\x16\xad\x44\xdb"
+				"\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a"
+				"\xc1\x58\xef\x63\xfa\x91\x05\x9c"
+				"\x33\xca\x3e\xd5\x6c\x03\x77\x0e"
+				"\xa5\x19\xb0\x47\xde\x52\xe9\x80"
+				"\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2"
+				"\x66\xfd\x94\x08\x9f\x36\xcd\x41"
+				"\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3"
+				"\x4a\xe1\x55\xec\x83\x1a\x8e\x25"
+				"\xbc\x30\xc7\x5e\xf5\x69\x00\x97"
+				"\x0b\xa2\x39\xd0\x44\xdb\x72\x09"
+				"\x7d\x14\xab\x1f\xb6\x4d\xe4\x58"
+				"\xef\x86\x1d\x91\x28\xbf\x33\xca"
+				"\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c"
+				"\xd3\x47\xde\x75\x0c\x80\x17\xae"
+				"\x22\xb9\x50\xe7\x5b\xf2\x89\x20"
+				"\x94\x2b\xc2\x36\xcd\x64\xfb\x6f"
+				"\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1"
+				"\x78\x0f\x83\x1a\xb1\x25\xbc\x53"
+				"\xea\x5e\xf5\x8c\x00\x97\x2e\xc5"
+				"\x39\xd0\x67\xfe\x72\x09\xa0\x14"
+				"\xab\x42\xd9\x4d\xe4\x7b\x12\x86"
+				"\x1d\xb4\x28\xbf\x56\xed\x61\xf8"
+				"\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a"
+				"\x01\x75\x0c\xa3\x17\xae\x45\xdc"
+				"\x50\xe7\x7e\x15\x89\x20\xb7\x2b"
+				"\xc2\x59\xf0\x64\xfb\x92\x06\x9d"
+				"\x34\xcb\x3f\xd6\x6d\x04\x78\x0f"
+				"\xa6\x1a\xb1\x48\xdf\x53\xea\x81"
+				"\x18\x8c\x23\xba\x2e\xc5\x5c\xf3"
+				"\x67\xfe\x95\x09\xa0\x37\xce\x42"
+				"\xd9\x70\x07\x7b\x12\xa9\x1d\xb4"
+				"\x4b\xe2\x56\xed\x84\x1b\x8f\x26"
+				"\xbd\x31\xc8\x5f\xf6\x6a\x01\x98",
+		.psize = 2048,
+		.digest		= (u8 *)(u16 []){ 0x23ca },
 	}
 };
 
@@ -14323,6 +14582,1623 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 	},
 };
 
+/*
+ * SM4 test vector taken from the draft RFC
+ * https://tools.ietf.org/html/draft-crypto-sm4-00#ref-GBT.32907-2016
+ */
+
+static const struct cipher_testvec sm4_enc_tv_template[] = {
+	{ /* SM4 Appendix A: Example Calculations. Example 1. */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.ilen	= 16,
+		.result	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
+			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
+		.rlen	= 16,
+	}, { /*
+	      *  SM4 Appendix A: Example Calculations.
+	      *  Last 10 iterations of Example 2.
+	      */
+		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
+			  "\x81\xfc\xa8\xe\x38\x3e\xef\x80"
+			  "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1",
+		.ilen	= 160,
+		.result	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1"
+			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
+			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
+		.rlen	= 160
+	}
+};
+
+static const struct cipher_testvec sm4_dec_tv_template[] = {
+	{ /* SM4 Appendix A: Example Calculations. Example 1. */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
+			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
+		.ilen	= 16,
+		.result	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.rlen	= 16,
+	}, { /*
+	      *  SM4 Appendix A: Example Calculations.
+	      *  Last 10 iterations of Example 2.
+	      */
+		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1"
+			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
+			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
+		.ilen	= 160,
+		.result	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
+			  "\x81\xfc\xa8\xe\x38\x3e\xef\x80"
+			  "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1",
+		.rlen	= 160
+	}
+};
+
+/*
+ * Speck test vectors taken from the original paper:
+ * "The Simon and Speck Families of Lightweight Block Ciphers"
+ * https://eprint.iacr.org/2013/404.pdf
+ *
+ * Note that the paper does not make byte and word order clear.  But it was
+ * confirmed with the authors that the intended orders are little endian byte
+ * order and (y, x) word order.  Equivalently, the printed test vectors, when
+ * looking at only the bytes (ignoring the whitespace that divides them into
+ * words), are backwards: the left-most byte is actually the one with the
+ * highest memory address, while the right-most byte is actually the one with
+ * the lowest memory address.
+ */
+
+static const struct cipher_testvec speck128_enc_tv_template[] = {
+	{ /* Speck128/128 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.klen	= 16,
+		.input	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
+			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
+		.ilen	= 16,
+		.result	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
+			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
+		.rlen	= 16,
+	}, { /* Speck128/192 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17",
+		.klen	= 24,
+		.input	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
+			  "\x68\x69\x65\x66\x20\x48\x61\x72",
+		.ilen	= 16,
+		.result	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
+			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
+		.rlen	= 16,
+	}, { /* Speck128/256 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.klen	= 32,
+		.input	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
+			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
+		.ilen	= 16,
+		.result	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
+			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
+		.rlen	= 16,
+	},
+};
+
+static const struct cipher_testvec speck128_dec_tv_template[] = {
+	{ /* Speck128/128 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.klen	= 16,
+		.input	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
+			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
+		.ilen	= 16,
+		.result	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
+			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
+		.rlen	= 16,
+	}, { /* Speck128/192 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17",
+		.klen	= 24,
+		.input	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
+			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
+		.ilen	= 16,
+		.result	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
+			  "\x68\x69\x65\x66\x20\x48\x61\x72",
+		.rlen	= 16,
+	}, { /* Speck128/256 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.klen	= 32,
+		.input	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
+			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
+		.ilen	= 16,
+		.result	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
+			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
+		.rlen	= 16,
+	},
+};
+
+/*
+ * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the
+ * result recomputed with Speck128 as the cipher
+ */
+
+static const struct cipher_testvec speck128_xts_enc_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ilen	= 32,
+		.result	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
+			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
+			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
+			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
+		.rlen	= 32,
+	}, {
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
+			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
+			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
+			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
+		.rlen	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
+			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
+			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
+			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
+			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
+			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
+			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
+			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
+			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
+			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
+			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
+			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
+			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
+			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
+			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
+			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
+			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
+			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
+			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
+			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
+			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
+			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
+			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
+			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
+			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
+			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
+			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
+			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
+			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
+			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
+			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
+			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
+			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
+			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
+			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
+			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
+			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
+			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
+			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
+			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
+			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
+			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
+			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
+			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
+			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
+			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
+			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
+			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
+			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
+			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
+			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
+			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
+			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
+			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
+			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
+			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
+			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
+			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
+			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
+			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
+			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
+			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
+			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
+			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
+			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
+			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
+			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
+		.rlen	= 512,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
+			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
+			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
+			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
+			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
+			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
+			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
+			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
+			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
+			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
+			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
+			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
+			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
+			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
+			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
+			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
+			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
+			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
+			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
+			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
+			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
+			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
+			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
+			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
+			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
+			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
+			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
+			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
+			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
+			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
+			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
+			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
+			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
+			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
+			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
+			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
+			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
+			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
+			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
+			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
+			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
+			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
+			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
+			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
+			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
+			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
+			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
+			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
+			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
+			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
+			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
+			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
+			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
+			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
+			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
+			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
+			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
+			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
+			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
+			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
+			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
+			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
+			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
+			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	}
+};
+
+static const struct cipher_testvec speck128_xts_dec_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
+			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
+			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
+			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
+		.ilen	= 32,
+		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.rlen	= 32,
+	}, {
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
+			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
+			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
+			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
+			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
+			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
+			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
+			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
+			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
+			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
+			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
+			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
+			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
+			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
+			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
+			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
+			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
+			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
+			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
+			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
+			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
+			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
+			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
+			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
+			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
+			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
+			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
+			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
+			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
+			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
+			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
+			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
+			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
+			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
+			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
+			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
+			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
+			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
+			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
+			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
+			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
+			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
+			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
+			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
+			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
+			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
+			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
+			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
+			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
+			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
+			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
+			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
+			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
+			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
+			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
+			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
+			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
+			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
+			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
+			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
+			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
+			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
+			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
+			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
+			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
+			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
+			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
+			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
+			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
+			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
+			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
+			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
+			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
+			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
+			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
+			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
+			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
+			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
+			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
+			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
+			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
+			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
+			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
+			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
+			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
+			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
+			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
+			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
+			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
+			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
+			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
+			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
+			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
+			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
+			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
+			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
+			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
+			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
+			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
+			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
+			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
+			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
+			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
+			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
+			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
+			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
+			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
+			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
+			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
+			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
+			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
+			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
+			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
+			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
+			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
+			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
+			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
+			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
+			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
+			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
+			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
+			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
+			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
+			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
+			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
+			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
+			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
+			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
+			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
+			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
+			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
+			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
+			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	}
+};
+
+static const struct cipher_testvec speck64_enc_tv_template[] = {
+	{ /* Speck64/96 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13",
+		.klen	= 12,
+		.input	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
+		.ilen	= 8,
+		.result	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
+		.rlen	= 8,
+	}, { /* Speck64/128 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
+		.klen	= 16,
+		.input	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
+		.ilen	= 8,
+		.result	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
+		.rlen	= 8,
+	},
+};
+
+static const struct cipher_testvec speck64_dec_tv_template[] = {
+	{ /* Speck64/96 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13",
+		.klen	= 12,
+		.input	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
+		.ilen	= 8,
+		.result	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
+		.rlen	= 8,
+	}, { /* Speck64/128 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
+		.klen	= 16,
+		.input	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
+		.ilen	= 8,
+		.result	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
+		.rlen	= 8,
+	},
+};
+
+/*
+ * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the result
+ * recomputed with Speck64 as the cipher, and key lengths adjusted
+ */
+
+static const struct cipher_testvec speck64_xts_enc_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 24,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ilen	= 32,
+		.result	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
+			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
+			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
+			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
+		.rlen	= 32,
+	}, {
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 24,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
+			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
+			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
+			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
+		.rlen	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 24,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
+			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
+			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
+			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93",
+		.klen	= 24,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
+			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
+			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
+			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
+			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
+			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
+			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
+			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
+			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
+			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
+			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
+			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
+			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
+			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
+			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
+			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
+			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
+			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
+			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
+			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
+			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
+			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
+			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
+			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
+			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
+			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
+			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
+			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
+			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
+			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
+			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
+			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
+			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
+			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
+			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
+			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
+			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
+			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
+			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
+			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
+			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
+			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
+			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
+			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
+			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
+			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
+			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
+			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
+			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
+			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
+			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
+			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
+			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
+			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
+			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
+			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
+			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
+			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
+			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
+			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
+			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
+			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
+			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
+			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
+		.rlen	= 512,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27",
+		.klen	= 32,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
+			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
+			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
+			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
+			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
+			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
+			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
+			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
+			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
+			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
+			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
+			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
+			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
+			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
+			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
+			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
+			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
+			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
+			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
+			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
+			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
+			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
+			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
+			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
+			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
+			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
+			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
+			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
+			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
+			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
+			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
+			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
+			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
+			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
+			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
+			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
+			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
+			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
+			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
+			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
+			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
+			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
+			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
+			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
+			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
+			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
+			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
+			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
+			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
+			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
+			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
+			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
+			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
+			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
+			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
+			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
+			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
+			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
+			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
+			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
+			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
+			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
+			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
+			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	}
+};
+
+static const struct cipher_testvec speck64_xts_dec_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 24,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
+			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
+			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
+			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
+		.ilen	= 32,
+		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.rlen	= 32,
+	}, {
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 24,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
+			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
+			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
+			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 24,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
+			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
+			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
+			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93",
+		.klen	= 24,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
+			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
+			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
+			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
+			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
+			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
+			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
+			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
+			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
+			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
+			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
+			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
+			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
+			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
+			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
+			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
+			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
+			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
+			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
+			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
+			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
+			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
+			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
+			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
+			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
+			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
+			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
+			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
+			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
+			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
+			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
+			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
+			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
+			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
+			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
+			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
+			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
+			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
+			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
+			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
+			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
+			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
+			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
+			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
+			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
+			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
+			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
+			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
+			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
+			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
+			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
+			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
+			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
+			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
+			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
+			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
+			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
+			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
+			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
+			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
+			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
+			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
+			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
+			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27",
+		.klen	= 32,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
+			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
+			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
+			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
+			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
+			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
+			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
+			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
+			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
+			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
+			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
+			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
+			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
+			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
+			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
+			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
+			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
+			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
+			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
+			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
+			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
+			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
+			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
+			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
+			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
+			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
+			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
+			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
+			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
+			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
+			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
+			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
+			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
+			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
+			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
+			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
+			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
+			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
+			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
+			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
+			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
+			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
+			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
+			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
+			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
+			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
+			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
+			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
+			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
+			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
+			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
+			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
+			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
+			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
+			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
+			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
+			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
+			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
+			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
+			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
+			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
+			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
+			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
+			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	}
+};
+
 /* Cast6 test vectors from RFC 2612 */
 static const struct cipher_testvec cast6_enc_tv_template[] = {
 	{
diff --git a/crypto/xts.c b/crypto/xts.c
index f317c48b5e43..12284183bd20 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -357,78 +357,6 @@ static int decrypt(struct skcipher_request *req)
 	return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 
-int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
-	      struct scatterlist *ssrc, unsigned int nbytes,
-	      struct xts_crypt_req *req)
-{
-	const unsigned int bsize = XTS_BLOCK_SIZE;
-	const unsigned int max_blks = req->tbuflen / bsize;
-	struct blkcipher_walk walk;
-	unsigned int nblocks;
-	le128 *src, *dst, *t;
-	le128 *t_buf = req->tbuf;
-	int err, i;
-
-	BUG_ON(max_blks < 1);
-
-	blkcipher_walk_init(&walk, sdst, ssrc, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	nblocks = min(nbytes / bsize, max_blks);
-	src = (le128 *)walk.src.virt.addr;
-	dst = (le128 *)walk.dst.virt.addr;
-
-	/* calculate first value of T */
-	req->tweak_fn(req->tweak_ctx, (u8 *)&t_buf[0], walk.iv);
-
-	i = 0;
-	goto first;
-
-	for (;;) {
-		do {
-			for (i = 0; i < nblocks; i++) {
-				gf128mul_x_ble(&t_buf[i], t);
-first:
-				t = &t_buf[i];
-
-				/* PP <- T xor P */
-				le128_xor(dst + i, t, src + i);
-			}
-
-			/* CC <- E(Key2,PP) */
-			req->crypt_fn(req->crypt_ctx, (u8 *)dst,
-				      nblocks * bsize);
-
-			/* C <- T xor CC */
-			for (i = 0; i < nblocks; i++)
-				le128_xor(dst + i, dst + i, &t_buf[i]);
-
-			src += nblocks;
-			dst += nblocks;
-			nbytes -= nblocks * bsize;
-			nblocks = min(nbytes / bsize, max_blks);
-		} while (nblocks > 0);
-
-		*(le128 *)walk.iv = *t;
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-		if (!nbytes)
-			break;
-
-		nblocks = min(nbytes / bsize, max_blks);
-		src = (le128 *)walk.src.virt.addr;
-		dst = (le128 *)walk.dst.virt.addr;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(xts_crypt);
-
 static int init_tfm(struct crypto_skcipher *tfm)
 {
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 95255ecfae7c..9e702bc4960f 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -31,11 +31,6 @@
 #define IORT_IOMMU_TYPE		((1 << ACPI_IORT_NODE_SMMU) |	\
 				(1 << ACPI_IORT_NODE_SMMU_V3))
 
-/* Until ACPICA headers cover IORT rev. C */
-#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
-#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX		0x2
-#endif
-
 struct iort_its_msi_chip {
 	struct list_head	list;
 	struct fwnode_handle	*fw_node;
@@ -366,7 +361,6 @@ static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node,
 	return NULL;
 }
 
-#if (ACPI_CA_VERSION > 0x20170929)
 static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 {
 	struct acpi_iort_smmu_v3 *smmu;
@@ -400,12 +394,6 @@ static int iort_get_id_mapping_index(struct acpi_iort_node *node)
 		return -EINVAL;
 	}
 }
-#else
-static inline int iort_get_id_mapping_index(struct acpi_iort_node *node)
-{
-	return -EINVAL;
-}
-#endif
 
 static struct acpi_iort_node *iort_node_map_id(struct acpi_iort_node *node,
 					       u32 id_in, u32 *id_out,
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 4d0f571c15f9..d53541e96bee 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -452,3 +452,10 @@ config UML_RANDOM
 	  (check your distro, or download from
 	  http://sourceforge.net/projects/gkernel/).  rngd periodically reads
 	  /dev/hwrng and injects the entropy into /dev/random.
+
+config HW_RANDOM_KEYSTONE
+	depends on ARCH_KEYSTONE
+	default HW_RANDOM
+	tristate "TI Keystone NETCP SA Hardware random number generator"
+	help
+	  This option enables Keystone's hardware random generator.
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index b780370bd4eb..533e913c93d1 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -38,3 +38,4 @@ obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
 obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
 obj-$(CONFIG_HW_RANDOM_MTK)	+= mtk-rng.o
 obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
+obj-$(CONFIG_HW_RANDOM_KEYSTONE) += ks-sa-rng.o
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index 7a84cec30c3a..6767d965c36c 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -163,6 +163,8 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 
 	/* Clock is optional on most platforms */
 	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
 
 	priv->rng.name = pdev->name;
 	priv->rng.init = bcm2835_rng_init;
diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
index dd1007aecb10..2d1352b67168 100644
--- a/drivers/char/hw_random/cavium-rng-vf.c
+++ b/drivers/char/hw_random/cavium-rng-vf.c
@@ -77,7 +77,7 @@ static int cavium_rng_probe_vf(struct	pci_dev		*pdev,
 }
 
 /* Remove the VF */
-void  cavium_rng_remove_vf(struct pci_dev *pdev)
+static void  cavium_rng_remove_vf(struct pci_dev *pdev)
 {
 	struct cavium_rng *rng;
 
diff --git a/drivers/char/hw_random/cavium-rng.c b/drivers/char/hw_random/cavium-rng.c
index a944e0a47f42..63d6e68c24d2 100644
--- a/drivers/char/hw_random/cavium-rng.c
+++ b/drivers/char/hw_random/cavium-rng.c
@@ -62,7 +62,7 @@ static int cavium_rng_probe(struct pci_dev *pdev,
 }
 
 /* Disable VF and RNG Hardware */
-void  cavium_rng_remove(struct pci_dev *pdev)
+static void cavium_rng_remove(struct pci_dev *pdev)
 {
 	struct cavium_rng_pf *rng;
 
diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
index eca87249bcff..250123bc4905 100644
--- a/drivers/char/hw_random/imx-rngc.c
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -300,7 +300,7 @@ static int __maybe_unused imx_rngc_resume(struct device *dev)
 	return 0;
 }
 
-SIMPLE_DEV_PM_OPS(imx_rngc_pm_ops, imx_rngc_suspend, imx_rngc_resume);
+static SIMPLE_DEV_PM_OPS(imx_rngc_pm_ops, imx_rngc_suspend, imx_rngc_resume);
 
 static const struct of_device_id imx_rngc_dt_ids[] = {
 	{ .compatible = "fsl,imx25-rngb", .data = NULL, },
diff --git a/drivers/char/hw_random/ks-sa-rng.c b/drivers/char/hw_random/ks-sa-rng.c
new file mode 100644
index 000000000000..62c6696c1dbd
--- /dev/null
+++ b/drivers/char/hw_random/ks-sa-rng.c
@@ -0,0 +1,257 @@
+/*
+ * Random Number Generator driver for the Keystone SOC
+ *
+ * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Authors:	Sandeep Nair
+ *		Vitaly Andrianov
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+
+#define SA_CMD_STATUS_OFS			0x8
+
+/* TRNG enable control in SA System module*/
+#define SA_CMD_STATUS_REG_TRNG_ENABLE		BIT(3)
+
+/* TRNG start control in TRNG module */
+#define TRNG_CNTL_REG_TRNG_ENABLE		BIT(10)
+
+/* Data ready indicator in STATUS register */
+#define TRNG_STATUS_REG_READY			BIT(0)
+
+/* Data ready clear control in INTACK register */
+#define TRNG_INTACK_REG_READY			BIT(0)
+
+/*
+ * Number of samples taken to gather entropy during startup.
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^8.
+ */
+#define TRNG_DEF_STARTUP_CYCLES			0
+#define TRNG_CNTL_REG_STARTUP_CYCLES_SHIFT	16
+
+/*
+ * Minimum number of samples taken to regenerate entropy
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^6.
+ */
+#define TRNG_DEF_MIN_REFILL_CYCLES		1
+#define TRNG_CFG_REG_MIN_REFILL_CYCLES_SHIFT	0
+
+/*
+ * Maximum number of samples taken to regenerate entropy
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^8.
+ */
+#define TRNG_DEF_MAX_REFILL_CYCLES		0
+#define TRNG_CFG_REG_MAX_REFILL_CYCLES_SHIFT	16
+
+/* Number of CLK input cycles between samples */
+#define TRNG_DEF_CLK_DIV_CYCLES			0
+#define TRNG_CFG_REG_SAMPLE_DIV_SHIFT		8
+
+/* Maximum retries to get rng data */
+#define SA_MAX_RNG_DATA_RETRIES			5
+/* Delay between retries (in usecs) */
+#define SA_RNG_DATA_RETRY_DELAY			5
+
+struct trng_regs {
+	u32	output_l;
+	u32	output_h;
+	u32	status;
+	u32	intmask;
+	u32	intack;
+	u32	control;
+	u32	config;
+};
+
+struct ks_sa_rng {
+	struct device	*dev;
+	struct hwrng	rng;
+	struct clk	*clk;
+	struct regmap	*regmap_cfg;
+	struct trng_regs *reg_rng;
+};
+
+static int ks_sa_rng_init(struct hwrng *rng)
+{
+	u32 value;
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Enable RNG module */
+	regmap_write_bits(ks_sa_rng->regmap_cfg, SA_CMD_STATUS_OFS,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE);
+
+	/* Configure RNG module */
+	writel(0, &ks_sa_rng->reg_rng->control);
+	value = TRNG_DEF_STARTUP_CYCLES << TRNG_CNTL_REG_STARTUP_CYCLES_SHIFT;
+	writel(value, &ks_sa_rng->reg_rng->control);
+
+	value =	(TRNG_DEF_MIN_REFILL_CYCLES <<
+		 TRNG_CFG_REG_MIN_REFILL_CYCLES_SHIFT) |
+		(TRNG_DEF_MAX_REFILL_CYCLES <<
+		 TRNG_CFG_REG_MAX_REFILL_CYCLES_SHIFT) |
+		(TRNG_DEF_CLK_DIV_CYCLES <<
+		 TRNG_CFG_REG_SAMPLE_DIV_SHIFT);
+
+	writel(value, &ks_sa_rng->reg_rng->config);
+
+	/* Disable all interrupts from TRNG */
+	writel(0, &ks_sa_rng->reg_rng->intmask);
+
+	/* Enable RNG */
+	value = readl(&ks_sa_rng->reg_rng->control);
+	value |= TRNG_CNTL_REG_TRNG_ENABLE;
+	writel(value, &ks_sa_rng->reg_rng->control);
+
+	return 0;
+}
+
+static void ks_sa_rng_cleanup(struct hwrng *rng)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Disable RNG */
+	writel(0, &ks_sa_rng->reg_rng->control);
+	regmap_write_bits(ks_sa_rng->regmap_cfg, SA_CMD_STATUS_OFS,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE, 0);
+}
+
+static int ks_sa_rng_data_read(struct hwrng *rng, u32 *data)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Read random data */
+	data[0] = readl(&ks_sa_rng->reg_rng->output_l);
+	data[1] = readl(&ks_sa_rng->reg_rng->output_h);
+
+	writel(TRNG_INTACK_REG_READY, &ks_sa_rng->reg_rng->intack);
+
+	return sizeof(u32) * 2;
+}
+
+static int ks_sa_rng_data_present(struct hwrng *rng, int wait)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	u32	ready;
+	int	j;
+
+	for (j = 0; j < SA_MAX_RNG_DATA_RETRIES; j++) {
+		ready = readl(&ks_sa_rng->reg_rng->status);
+		ready &= TRNG_STATUS_REG_READY;
+
+		if (ready || !wait)
+			break;
+
+		udelay(SA_RNG_DATA_RETRY_DELAY);
+	}
+
+	return ready;
+}
+
+static int ks_sa_rng_probe(struct platform_device *pdev)
+{
+	struct ks_sa_rng	*ks_sa_rng;
+	struct device		*dev = &pdev->dev;
+	int			ret;
+	struct resource		*mem;
+
+	ks_sa_rng = devm_kzalloc(dev, sizeof(*ks_sa_rng), GFP_KERNEL);
+	if (!ks_sa_rng)
+		return -ENOMEM;
+
+	ks_sa_rng->dev = dev;
+	ks_sa_rng->rng = (struct hwrng) {
+		.name = "ks_sa_hwrng",
+		.init = ks_sa_rng_init,
+		.data_read = ks_sa_rng_data_read,
+		.data_present = ks_sa_rng_data_present,
+		.cleanup = ks_sa_rng_cleanup,
+	};
+	ks_sa_rng->rng.priv = (unsigned long)dev;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ks_sa_rng->reg_rng = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(ks_sa_rng->reg_rng))
+		return PTR_ERR(ks_sa_rng->reg_rng);
+
+	ks_sa_rng->regmap_cfg =
+		syscon_regmap_lookup_by_phandle(dev->of_node,
+						"ti,syscon-sa-cfg");
+
+	if (IS_ERR(ks_sa_rng->regmap_cfg)) {
+		dev_err(dev, "syscon_node_to_regmap failed\n");
+		return -EINVAL;
+	}
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable SA power-domain\n");
+		pm_runtime_disable(dev);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, ks_sa_rng);
+
+	return devm_hwrng_register(&pdev->dev, &ks_sa_rng->rng);
+}
+
+static int ks_sa_rng_remove(struct platform_device *pdev)
+{
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id ks_sa_rng_dt_match[] = {
+	{
+		.compatible = "ti,keystone-rng",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ks_sa_rng_dt_match);
+
+static struct platform_driver ks_sa_rng_driver = {
+	.driver		= {
+		.name	= "ks-sa-rng",
+		.of_match_table = ks_sa_rng_dt_match,
+	},
+	.probe		= ks_sa_rng_probe,
+	.remove		= ks_sa_rng_remove,
+};
+
+module_platform_driver(ks_sa_rng_driver);
+
+MODULE_DESCRIPTION("Keystone NETCP SA H/W Random Number Generator driver");
+MODULE_AUTHOR("Vitaly Andrianov <vitalya@ti.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c
index 467362262651..f83bee513d91 100644
--- a/drivers/char/hw_random/mxc-rnga.c
+++ b/drivers/char/hw_random/mxc-rnga.c
@@ -16,16 +16,13 @@
  * This driver is based on other RNG drivers.
  */
 
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
 #include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/ioport.h>
-#include <linux/platform_device.h>
-#include <linux/hw_random.h>
 #include <linux/delay.h>
+#include <linux/hw_random.h>
 #include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 /* RNGA Registers */
 #define RNGA_CONTROL			0x00
@@ -197,10 +194,18 @@ static int __exit mxc_rnga_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id mxc_rnga_of_match[] = {
+	{ .compatible = "fsl,imx21-rnga", },
+	{ .compatible = "fsl,imx31-rnga", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, mxc_rnga_of_match);
+
 static struct platform_driver mxc_rnga_driver = {
 	.driver = {
-		   .name = "mxc_rnga",
-		   },
+		.name = "mxc_rnga",
+		.of_match_table = mxc_rnga_of_match,
+	},
 	.remove = __exit_p(mxc_rnga_remove),
 };
 
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 74d11ae6abe9..b65ff6962899 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -150,6 +150,7 @@ struct omap_rng_dev {
 	const struct omap_rng_pdata	*pdata;
 	struct hwrng rng;
 	struct clk 			*clk;
+	struct clk			*clk_reg;
 };
 
 static inline u32 omap_rng_read(struct omap_rng_dev *priv, u16 reg)
@@ -480,6 +481,19 @@ static int omap_rng_probe(struct platform_device *pdev)
 		}
 	}
 
+	priv->clk_reg = devm_clk_get(&pdev->dev, "reg");
+	if (IS_ERR(priv->clk_reg) && PTR_ERR(priv->clk_reg) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+	if (!IS_ERR(priv->clk_reg)) {
+		ret = clk_prepare_enable(priv->clk_reg);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"Unable to enable the register clk: %d\n",
+				ret);
+			goto err_register;
+		}
+	}
+
 	ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) :
 				get_omap_rng_device_details(priv);
 	if (ret)
@@ -499,8 +513,8 @@ err_register:
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	if (!IS_ERR(priv->clk))
-		clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk_reg);
+	clk_disable_unprepare(priv->clk);
 err_ioremap:
 	dev_err(dev, "initialization failed.\n");
 	return ret;
@@ -517,8 +531,8 @@ static int omap_rng_remove(struct platform_device *pdev)
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	if (!IS_ERR(priv->clk))
-		clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk_reg);
 
 	return 0;
 }
diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index 63d84e6f1891..0d2328da3b76 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -16,15 +16,18 @@
 #include <linux/delay.h>
 #include <linux/hw_random.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/pm_runtime.h>
+#include <linux/reset.h>
 #include <linux/slab.h>
 
 #define RNG_CR 0x00
 #define RNG_CR_RNGEN BIT(2)
+#define RNG_CR_CED BIT(5)
 
 #define RNG_SR 0x04
 #define RNG_SR_SEIS BIT(6)
@@ -33,19 +36,12 @@
 
 #define RNG_DR 0x08
 
-/*
- * It takes 40 cycles @ 48MHz to generate each random number (e.g. <1us).
- * At the time of writing STM32 parts max out at ~200MHz meaning a timeout
- * of 500 leaves us a very comfortable margin for error. The loop to which
- * the timeout applies takes at least 4 instructions per iteration so the
- * timeout is enough to take us up to multi-GHz parts!
- */
-#define RNG_TIMEOUT 500
-
 struct stm32_rng_private {
 	struct hwrng rng;
 	void __iomem *base;
 	struct clk *clk;
+	struct reset_control *rst;
+	bool ced;
 };
 
 static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
@@ -59,13 +55,16 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 
 	while (max > sizeof(u32)) {
 		sr = readl_relaxed(priv->base + RNG_SR);
+		/* Manage timeout which is based on timer and take */
+		/* care of initial delay time when enabling rng	*/
 		if (!sr && wait) {
-			unsigned int timeout = RNG_TIMEOUT;
-
-			do {
-				cpu_relax();
-				sr = readl_relaxed(priv->base + RNG_SR);
-			} while (!sr && --timeout);
+			retval = readl_relaxed_poll_timeout_atomic(priv->base
+								   + RNG_SR,
+								   sr, sr,
+								   10, 50000);
+			if (retval)
+				dev_err((struct device *)priv->rng.priv,
+					"%s: timeout %x!\n", __func__, sr);
 		}
 
 		/* If error detected or data not ready... */
@@ -99,7 +98,11 @@ static int stm32_rng_init(struct hwrng *rng)
 	if (err)
 		return err;
 
-	writel_relaxed(RNG_CR_RNGEN, priv->base + RNG_CR);
+	if (priv->ced)
+		writel_relaxed(RNG_CR_RNGEN, priv->base + RNG_CR);
+	else
+		writel_relaxed(RNG_CR_RNGEN | RNG_CR_CED,
+			       priv->base + RNG_CR);
 
 	/* clear error indicators */
 	writel_relaxed(0, priv->base + RNG_SR);
@@ -140,6 +143,15 @@ static int stm32_rng_probe(struct platform_device *ofdev)
 	if (IS_ERR(priv->clk))
 		return PTR_ERR(priv->clk);
 
+	priv->rst = devm_reset_control_get(&ofdev->dev, NULL);
+	if (!IS_ERR(priv->rst)) {
+		reset_control_assert(priv->rst);
+		udelay(2);
+		reset_control_deassert(priv->rst);
+	}
+
+	priv->ced = of_property_read_bool(np, "clock-error-detect");
+
 	dev_set_drvdata(dev, priv);
 
 	priv->rng.name = dev_driver_string(dev),
diff --git a/drivers/char/random.c b/drivers/char/random.c
index e5b3d3ba4660..e027e7fa1472 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -709,7 +709,8 @@ retry:
 		}
 
 		/* should we wake readers? */
-		if (entropy_bits >= random_read_wakeup_bits) {
+		if (entropy_bits >= random_read_wakeup_bits &&
+		    wq_has_sleeper(&random_read_wait)) {
 			wake_up_interruptible(&random_read_wait);
 			kill_fasync(&fasync, SIGIO, POLL_IN);
 		}
@@ -732,7 +733,7 @@ retry:
 
 static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
 {
-	const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));
+	const int nbits_max = r->poolinfo->poolwords * 32;
 
 	if (nbits < 0)
 		return -EINVAL;
@@ -963,7 +964,6 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 struct timer_rand_state {
 	cycles_t last_time;
 	long last_delta, last_delta2;
-	unsigned dont_count_entropy:1;
 };
 
 #define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
@@ -1029,35 +1029,33 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
 	 * We take into account the first, second and third-order deltas
 	 * in order to make our estimate.
 	 */
+	delta = sample.jiffies - state->last_time;
+	state->last_time = sample.jiffies;
+
+	delta2 = delta - state->last_delta;
+	state->last_delta = delta;
+
+	delta3 = delta2 - state->last_delta2;
+	state->last_delta2 = delta2;
+
+	if (delta < 0)
+		delta = -delta;
+	if (delta2 < 0)
+		delta2 = -delta2;
+	if (delta3 < 0)
+		delta3 = -delta3;
+	if (delta > delta2)
+		delta = delta2;
+	if (delta > delta3)
+		delta = delta3;
 
-	if (!state->dont_count_entropy) {
-		delta = sample.jiffies - state->last_time;
-		state->last_time = sample.jiffies;
-
-		delta2 = delta - state->last_delta;
-		state->last_delta = delta;
-
-		delta3 = delta2 - state->last_delta2;
-		state->last_delta2 = delta2;
-
-		if (delta < 0)
-			delta = -delta;
-		if (delta2 < 0)
-			delta2 = -delta2;
-		if (delta3 < 0)
-			delta3 = -delta3;
-		if (delta > delta2)
-			delta = delta2;
-		if (delta > delta3)
-			delta = delta3;
+	/*
+	 * delta is now minimum absolute delta.
+	 * Round down by 1 bit on general principles,
+	 * and limit entropy entimate to 12 bits.
+	 */
+	credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
 
-		/*
-		 * delta is now minimum absolute delta.
-		 * Round down by 1 bit on general principles,
-		 * and limit entropy entimate to 12 bits.
-		 */
-		credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
-	}
 	preempt_enable();
 }
 
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 4b741b83e23f..d1ea1a07cecb 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -464,13 +464,6 @@ if CRYPTO_DEV_UX500
 	source "drivers/crypto/ux500/Kconfig"
 endif # if CRYPTO_DEV_UX500
 
-config CRYPTO_DEV_BFIN_CRC
-	tristate "Support for Blackfin CRC hardware"
-	depends on BF60x
-	help
-	  Newer Blackfin processors have CRC hardware. Select this if you
-	  want to use the Blackfin CRC module.
-
 config CRYPTO_DEV_ATMEL_AUTHENC
 	tristate "Support for Atmel IPSEC/SSL hw accelerator"
 	depends on HAS_DMA
@@ -730,4 +723,31 @@ config CRYPTO_DEV_ARTPEC6
 
 	  To compile this driver as a module, choose M here.
 
+config CRYPTO_DEV_CCREE
+	tristate "Support for ARM TrustZone CryptoCell family of security processors"
+	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA
+	default n
+	select CRYPTO_HASH
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_DES
+	select CRYPTO_AEAD
+	select CRYPTO_AUTHENC
+	select CRYPTO_SHA1
+	select CRYPTO_MD5
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	select CRYPTO_HMAC
+	select CRYPTO_AES
+	select CRYPTO_CBC
+	select CRYPTO_ECB
+	select CRYPTO_CTR
+	select CRYPTO_XTS
+	help
+	  Say 'Y' to enable a driver for the REE interface of the Arm
+	  TrustZone CryptoCell family of processors. Currently the
+	  CryptoCell 712, 710 and 630 are supported.
+	  Choose this if you wish to use hardware acceleration of
+	  cryptographic operations on the system REE.
+	  If unsure say Y.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 2513d13ea2c4..7ae87b4f6c8d 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -3,9 +3,9 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
-obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
 obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
+obj-$(CONFIG_CRYPTO_DEV_CCREE) += ccree/
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
 obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
 obj-$(CONFIG_CRYPTO_DEV_NITROX) += cavium/nitrox/
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 691c6465b71e..801aeab5ab1e 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -2155,7 +2155,7 @@ static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key,
 
 badkey:
 	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	memzero_explicit(&key, sizeof(keys));
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
@@ -2602,16 +2602,13 @@ static struct crypto_platform_data *atmel_aes_of_init(struct platform_device *pd
 	}
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
 	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
 		devm_kfree(&pdev->dev, pdata);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -2649,7 +2646,6 @@ static int atmel_aes_probe(struct platform_device *pdev)
 
 	aes_dd = devm_kzalloc(&pdev->dev, sizeof(*aes_dd), GFP_KERNEL);
 	if (aes_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		goto aes_dd_err;
 	}
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 8874aa5ca0f7..4d43081120db 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -2726,18 +2726,14 @@ static struct crypto_platform_data *atmel_sha_of_init(struct platform_device *pd
 	}
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
-	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
+	if (!pdata->dma_slave)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	return pdata;
 }
@@ -2758,7 +2754,6 @@ static int atmel_sha_probe(struct platform_device *pdev)
 
 	sha_dd = devm_kzalloc(&pdev->dev, sizeof(*sha_dd), GFP_KERNEL);
 	if (sha_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		goto sha_dd_err;
 	}
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 592124f8382b..97b0423efa7f 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1312,18 +1312,14 @@ static struct crypto_platform_data *atmel_tdes_of_init(struct platform_device *p
 	}
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
-	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
+	if (!pdata->dma_slave)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	return pdata;
 }
@@ -1344,7 +1340,6 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 
 	tdes_dd = devm_kmalloc(&pdev->dev, sizeof(*tdes_dd), GFP_KERNEL);
 	if (tdes_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		goto tdes_dd_err;
 	}
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index 2b75f95bbe1b..309c67c7012f 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -818,7 +818,7 @@ static int handle_ahash_req(struct iproc_reqctx_s *rctx)
 
 	/* AES hashing keeps key size in type field, so need to copy it here */
 	if (hash_parms.alg == HASH_ALG_AES)
-		hash_parms.type = cipher_parms.type;
+		hash_parms.type = (enum hash_type)cipher_parms.type;
 	else
 		hash_parms.type = spu->spu_hash_type(rctx->total_sent);
 
@@ -1409,7 +1409,7 @@ static int handle_aead_req(struct iproc_reqctx_s *rctx)
 						rctx->iv_ctr_len);
 
 	if (ctx->auth.alg == HASH_ALG_AES)
-		hash_parms.type = ctx->cipher_type;
+		hash_parms.type = (enum hash_type)ctx->cipher_type;
 
 	/* General case AAD padding (CCM and RFC4543 special cases below) */
 	aead_parms.aad_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,
diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c
index d543c010ccd9..a912c6ad3e85 100644
--- a/drivers/crypto/bcm/util.c
+++ b/drivers/crypto/bcm/util.c
@@ -279,7 +279,6 @@ int do_shash(unsigned char *name, unsigned char *result,
 	sdesc = kmalloc(size, GFP_KERNEL);
 	if (!sdesc) {
 		rc = -ENOMEM;
-		pr_err("%s: Memory allocation failure\n", __func__);
 		goto do_shash_err;
 	}
 	sdesc->shash.tfm = hash;
diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c
deleted file mode 100644
index bfbf8bf77f03..000000000000
--- a/drivers/crypto/bfin_crc.c
+++ /dev/null
@@ -1,743 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Support Blackfin CRC HW acceleration.
- *
- * Copyright 2012 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- */
-
-#include <linux/err.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/delay.h>
-#include <linux/crypto.h>
-#include <linux/cryptohash.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/algapi.h>
-#include <crypto/hash.h>
-#include <crypto/internal/hash.h>
-#include <asm/unaligned.h>
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-#include <asm/io.h>
-
-#include "bfin_crc.h"
-
-#define CRC_CCRYPTO_QUEUE_LENGTH	5
-
-#define DRIVER_NAME "bfin-hmac-crc"
-#define CHKSUM_DIGEST_SIZE      4
-#define CHKSUM_BLOCK_SIZE       1
-
-#define CRC_MAX_DMA_DESC	100
-
-#define CRC_CRYPTO_STATE_UPDATE		1
-#define CRC_CRYPTO_STATE_FINALUPDATE	2
-#define CRC_CRYPTO_STATE_FINISH		3
-
-struct bfin_crypto_crc {
-	struct list_head	list;
-	struct device		*dev;
-	spinlock_t		lock;
-
-	int			irq;
-	int			dma_ch;
-	u32			poly;
-	struct crc_register	*regs;
-
-	struct ahash_request	*req; /* current request in operation */
-	struct dma_desc_array	*sg_cpu; /* virt addr of sg dma descriptors */
-	dma_addr_t		sg_dma; /* phy addr of sg dma descriptors */
-	u8			*sg_mid_buf;
-	dma_addr_t		sg_mid_dma; /* phy addr of sg mid buffer */
-
-	struct tasklet_struct	done_task;
-	struct crypto_queue	queue; /* waiting requests */
-
-	u8			busy:1; /* crc device in operation flag */
-};
-
-static struct bfin_crypto_crc_list {
-	struct list_head	dev_list;
-	spinlock_t		lock;
-} crc_list;
-
-struct bfin_crypto_crc_reqctx {
-	struct bfin_crypto_crc	*crc;
-
-	unsigned int		total;	/* total request bytes */
-	size_t			sg_buflen; /* bytes for this update */
-	unsigned int		sg_nents;
-	struct scatterlist	*sg; /* sg list head for this update*/
-	struct scatterlist	bufsl[2]; /* chained sg list */
-
-	size_t			bufnext_len;
-	size_t			buflast_len;
-	u8			bufnext[CHKSUM_DIGEST_SIZE]; /* extra bytes for next udpate */
-	u8			buflast[CHKSUM_DIGEST_SIZE]; /* extra bytes from last udpate */
-
-	u8			flag;
-};
-
-struct bfin_crypto_crc_ctx {
-	struct bfin_crypto_crc	*crc;
-	u32			key;
-};
-
-/*
- * get element in scatter list by given index
- */
-static struct scatterlist *sg_get(struct scatterlist *sg_list, unsigned int nents,
-				unsigned int index)
-{
-	struct scatterlist *sg = NULL;
-	int i;
-
-	for_each_sg(sg_list, sg, nents, i)
-		if (i == index)
-			break;
-
-	return sg;
-}
-
-static int bfin_crypto_crc_init_hw(struct bfin_crypto_crc *crc, u32 key)
-{
-	writel(0, &crc->regs->datacntrld);
-	writel(MODE_CALC_CRC << OPMODE_OFFSET, &crc->regs->control);
-	writel(key, &crc->regs->curresult);
-
-	/* setup CRC interrupts */
-	writel(CMPERRI | DCNTEXPI, &crc->regs->status);
-	writel(CMPERRI | DCNTEXPI, &crc->regs->intrenset);
-
-	return 0;
-}
-
-static int bfin_crypto_crc_init(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-	struct bfin_crypto_crc *crc;
-
-	dev_dbg(ctx->crc->dev, "crc_init\n");
-	spin_lock_bh(&crc_list.lock);
-	list_for_each_entry(crc, &crc_list.dev_list, list) {
-		crc_ctx->crc = crc;
-		break;
-	}
-	spin_unlock_bh(&crc_list.lock);
-
-	if (sg_nents(req->src) > CRC_MAX_DMA_DESC) {
-		dev_dbg(ctx->crc->dev, "init: requested sg list is too big > %d\n",
-			CRC_MAX_DMA_DESC);
-		return -EINVAL;
-	}
-
-	ctx->crc = crc;
-	ctx->bufnext_len = 0;
-	ctx->buflast_len = 0;
-	ctx->sg_buflen = 0;
-	ctx->total = 0;
-	ctx->flag = 0;
-
-	/* init crc results */
-	put_unaligned_le32(crc_ctx->key, req->result);
-
-	dev_dbg(ctx->crc->dev, "init: digest size: %d\n",
-		crypto_ahash_digestsize(tfm));
-
-	return bfin_crypto_crc_init_hw(crc, crc_ctx->key);
-}
-
-static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc)
-{
-	struct scatterlist *sg;
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(crc->req);
-	int i = 0, j = 0;
-	unsigned long dma_config;
-	unsigned int dma_count;
-	unsigned int dma_addr;
-	unsigned int mid_dma_count = 0;
-	int dma_mod;
-
-	dma_map_sg(crc->dev, ctx->sg, ctx->sg_nents, DMA_TO_DEVICE);
-
-	for_each_sg(ctx->sg, sg, ctx->sg_nents, j) {
-		dma_addr = sg_dma_address(sg);
-		/* deduce extra bytes in last sg */
-		if (sg_is_last(sg))
-			dma_count = sg_dma_len(sg) - ctx->bufnext_len;
-		else
-			dma_count = sg_dma_len(sg);
-
-		if (mid_dma_count) {
-			/* Append last middle dma buffer to 4 bytes with first
-			   bytes in current sg buffer. Move addr of current
-			   sg and deduce the length of current sg.
-			 */
-			memcpy(crc->sg_mid_buf +(i << 2) + mid_dma_count,
-				sg_virt(sg),
-				CHKSUM_DIGEST_SIZE - mid_dma_count);
-			dma_addr += CHKSUM_DIGEST_SIZE - mid_dma_count;
-			dma_count -= CHKSUM_DIGEST_SIZE - mid_dma_count;
-
-			dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 |
-				DMAEN | PSIZE_32 | WDSIZE_32;
-
-			/* setup new dma descriptor for next middle dma */
-			crc->sg_cpu[i].start_addr = crc->sg_mid_dma + (i << 2);
-			crc->sg_cpu[i].cfg = dma_config;
-			crc->sg_cpu[i].x_count = 1;
-			crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
-			dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-				"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-				i, crc->sg_cpu[i].start_addr,
-				crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-				crc->sg_cpu[i].x_modify);
-			i++;
-		}
-
-		dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32;
-		/* chop current sg dma len to multiple of 32 bits */
-		mid_dma_count = dma_count % 4;
-		dma_count &= ~0x3;
-
-		if (dma_addr % 4 == 0) {
-			dma_config |= WDSIZE_32;
-			dma_count >>= 2;
-			dma_mod = 4;
-		} else if (dma_addr % 2 == 0) {
-			dma_config |= WDSIZE_16;
-			dma_count >>= 1;
-			dma_mod = 2;
-		} else {
-			dma_config |= WDSIZE_8;
-			dma_mod = 1;
-		}
-
-		crc->sg_cpu[i].start_addr = dma_addr;
-		crc->sg_cpu[i].cfg = dma_config;
-		crc->sg_cpu[i].x_count = dma_count;
-		crc->sg_cpu[i].x_modify = dma_mod;
-		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-			"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-			i, crc->sg_cpu[i].start_addr,
-			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-			crc->sg_cpu[i].x_modify);
-		i++;
-
-		if (mid_dma_count) {
-			/* copy extra bytes to next middle dma buffer */
-			memcpy(crc->sg_mid_buf + (i << 2),
-				(u8*)sg_virt(sg) + (dma_count << 2),
-				mid_dma_count);
-		}
-	}
-
-	dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32 | WDSIZE_32;
-	/* For final update req, append the buffer for next update as well*/
-	if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
-		ctx->flag == CRC_CRYPTO_STATE_FINISH)) {
-		crc->sg_cpu[i].start_addr = dma_map_single(crc->dev, ctx->bufnext,
-						CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE);
-		crc->sg_cpu[i].cfg = dma_config;
-		crc->sg_cpu[i].x_count = 1;
-		crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
-		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-			"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-			i, crc->sg_cpu[i].start_addr,
-			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-			crc->sg_cpu[i].x_modify);
-		i++;
-	}
-
-	if (i == 0)
-		return;
-
-	/* Set the last descriptor to stop mode */
-	crc->sg_cpu[i - 1].cfg &= ~(DMAFLOW | NDSIZE);
-	crc->sg_cpu[i - 1].cfg |= DI_EN;
-	set_dma_curr_desc_addr(crc->dma_ch, (unsigned long *)crc->sg_dma);
-	set_dma_x_count(crc->dma_ch, 0);
-	set_dma_x_modify(crc->dma_ch, 0);
-	set_dma_config(crc->dma_ch, dma_config);
-}
-
-static int bfin_crypto_crc_handle_queue(struct bfin_crypto_crc *crc,
-				  struct ahash_request *req)
-{
-	struct crypto_async_request *async_req, *backlog;
-	struct bfin_crypto_crc_reqctx *ctx;
-	struct scatterlist *sg;
-	int ret = 0;
-	int nsg, i, j;
-	unsigned int nextlen;
-	unsigned long flags;
-	u32 reg;
-
-	spin_lock_irqsave(&crc->lock, flags);
-	if (req)
-		ret = ahash_enqueue_request(&crc->queue, req);
-	if (crc->busy) {
-		spin_unlock_irqrestore(&crc->lock, flags);
-		return ret;
-	}
-	backlog = crypto_get_backlog(&crc->queue);
-	async_req = crypto_dequeue_request(&crc->queue);
-	if (async_req)
-		crc->busy = 1;
-	spin_unlock_irqrestore(&crc->lock, flags);
-
-	if (!async_req)
-		return ret;
-
-	if (backlog)
-		backlog->complete(backlog, -EINPROGRESS);
-
-	req = ahash_request_cast(async_req);
-	crc->req = req;
-	ctx = ahash_request_ctx(req);
-	ctx->sg = NULL;
-	ctx->sg_buflen = 0;
-	ctx->sg_nents = 0;
-
-	dev_dbg(crc->dev, "handling new req, flag=%u, nbytes: %d\n",
-						ctx->flag, req->nbytes);
-
-	if (ctx->flag == CRC_CRYPTO_STATE_FINISH) {
-		if (ctx->bufnext_len == 0) {
-			crc->busy = 0;
-			return 0;
-		}
-
-		/* Pack last crc update buffer to 32bit */
-		memset(ctx->bufnext + ctx->bufnext_len, 0,
-				CHKSUM_DIGEST_SIZE - ctx->bufnext_len);
-	} else {
-		/* Pack small data which is less than 32bit to buffer for next update. */
-		if (ctx->bufnext_len + req->nbytes < CHKSUM_DIGEST_SIZE) {
-			memcpy(ctx->bufnext + ctx->bufnext_len,
-				sg_virt(req->src), req->nbytes);
-			ctx->bufnext_len += req->nbytes;
-			if (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE &&
-				ctx->bufnext_len) {
-				goto finish_update;
-			} else {
-				crc->busy = 0;
-				return 0;
-			}
-		}
-
-		if (ctx->bufnext_len) {
-			/* Chain in extra bytes of last update */
-			ctx->buflast_len = ctx->bufnext_len;
-			memcpy(ctx->buflast, ctx->bufnext, ctx->buflast_len);
-
-			nsg = ctx->sg_buflen ? 2 : 1;
-			sg_init_table(ctx->bufsl, nsg);
-			sg_set_buf(ctx->bufsl, ctx->buflast, ctx->buflast_len);
-			if (nsg > 1)
-				sg_chain(ctx->bufsl, nsg, req->src);
-			ctx->sg = ctx->bufsl;
-		} else
-			ctx->sg = req->src;
-
-		/* Chop crc buffer size to multiple of 32 bit */
-		nsg = sg_nents(ctx->sg);
-		ctx->sg_nents = nsg;
-		ctx->sg_buflen = ctx->buflast_len + req->nbytes;
-		ctx->bufnext_len = ctx->sg_buflen % 4;
-		ctx->sg_buflen &= ~0x3;
-
-		if (ctx->bufnext_len) {
-			/* copy extra bytes to buffer for next update */
-			memset(ctx->bufnext, 0, CHKSUM_DIGEST_SIZE);
-			nextlen = ctx->bufnext_len;
-			for (i = nsg - 1; i >= 0; i--) {
-				sg = sg_get(ctx->sg, nsg, i);
-				j = min(nextlen, sg_dma_len(sg));
-				memcpy(ctx->bufnext + nextlen - j,
-					sg_virt(sg) + sg_dma_len(sg) - j, j);
-				if (j == sg_dma_len(sg))
-					ctx->sg_nents--;
-				nextlen -= j;
-				if (nextlen == 0)
-					break;
-			}
-		}
-	}
-
-finish_update:
-	if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
-		ctx->flag == CRC_CRYPTO_STATE_FINISH))
-		ctx->sg_buflen += CHKSUM_DIGEST_SIZE;
-
-	/* set CRC data count before start DMA */
-	writel(ctx->sg_buflen >> 2, &crc->regs->datacnt);
-
-	/* setup and enable CRC DMA */
-	bfin_crypto_crc_config_dma(crc);
-
-	/* finally kick off CRC operation */
-	reg = readl(&crc->regs->control);
-	writel(reg | BLKEN, &crc->regs->control);
-
-	return -EINPROGRESS;
-}
-
-static int bfin_crypto_crc_update(struct ahash_request *req)
-{
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	if (!req->nbytes)
-		return 0;
-
-	dev_dbg(ctx->crc->dev, "crc_update\n");
-	ctx->total += req->nbytes;
-	ctx->flag = CRC_CRYPTO_STATE_UPDATE;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_final(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	dev_dbg(ctx->crc->dev, "crc_final\n");
-	ctx->flag = CRC_CRYPTO_STATE_FINISH;
-	crc_ctx->key = 0;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_finup(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	dev_dbg(ctx->crc->dev, "crc_finishupdate\n");
-	ctx->total += req->nbytes;
-	ctx->flag = CRC_CRYPTO_STATE_FINALUPDATE;
-	crc_ctx->key = 0;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_digest(struct ahash_request *req)
-{
-	int ret;
-
-	ret = bfin_crypto_crc_init(req);
-	if (ret)
-		return ret;
-
-	return bfin_crypto_crc_finup(req);
-}
-
-static int bfin_crypto_crc_setkey(struct crypto_ahash *tfm, const u8 *key,
-			unsigned int keylen)
-{
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-
-	dev_dbg(crc_ctx->crc->dev, "crc_setkey\n");
-	if (keylen != CHKSUM_DIGEST_SIZE) {
-		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	crc_ctx->key = get_unaligned_le32(key);
-
-	return 0;
-}
-
-static int bfin_crypto_crc_cra_init(struct crypto_tfm *tfm)
-{
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_tfm_ctx(tfm);
-
-	crc_ctx->key = 0;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct bfin_crypto_crc_reqctx));
-
-	return 0;
-}
-
-static void bfin_crypto_crc_cra_exit(struct crypto_tfm *tfm)
-{
-}
-
-static struct ahash_alg algs = {
-	.init		= bfin_crypto_crc_init,
-	.update		= bfin_crypto_crc_update,
-	.final		= bfin_crypto_crc_final,
-	.finup		= bfin_crypto_crc_finup,
-	.digest		= bfin_crypto_crc_digest,
-	.setkey		= bfin_crypto_crc_setkey,
-	.halg.digestsize	= CHKSUM_DIGEST_SIZE,
-	.halg.base	= {
-		.cra_name		= "hmac(crc32)",
-		.cra_driver_name	= DRIVER_NAME,
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
-						CRYPTO_ALG_ASYNC |
-						CRYPTO_ALG_OPTIONAL_KEY,
-		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct bfin_crypto_crc_ctx),
-		.cra_alignmask		= 3,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= bfin_crypto_crc_cra_init,
-		.cra_exit		= bfin_crypto_crc_cra_exit,
-	}
-};
-
-static void bfin_crypto_crc_done_task(unsigned long data)
-{
-	struct bfin_crypto_crc *crc = (struct bfin_crypto_crc *)data;
-
-	bfin_crypto_crc_handle_queue(crc, NULL);
-}
-
-static irqreturn_t bfin_crypto_crc_handler(int irq, void *dev_id)
-{
-	struct bfin_crypto_crc *crc = dev_id;
-	u32 reg;
-
-	if (readl(&crc->regs->status) & DCNTEXP) {
-		writel(DCNTEXP, &crc->regs->status);
-
-		/* prepare results */
-		put_unaligned_le32(readl(&crc->regs->result),
-			crc->req->result);
-
-		reg = readl(&crc->regs->control);
-		writel(reg & ~BLKEN, &crc->regs->control);
-		crc->busy = 0;
-
-		if (crc->req->base.complete)
-			crc->req->base.complete(&crc->req->base, 0);
-
-		tasklet_schedule(&crc->done_task);
-
-		return IRQ_HANDLED;
-	} else
-		return IRQ_NONE;
-}
-
-#ifdef CONFIG_PM
-/**
- *	bfin_crypto_crc_suspend - suspend crc device
- *	@pdev: device being suspended
- *	@state: requested suspend state
- */
-static int bfin_crypto_crc_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
-	int i = 100000;
-
-	while ((readl(&crc->regs->control) & BLKEN) && --i)
-		cpu_relax();
-
-	if (i == 0)
-		return -EBUSY;
-
-	return 0;
-}
-#else
-# define bfin_crypto_crc_suspend NULL
-#endif
-
-#define bfin_crypto_crc_resume NULL
-
-/**
- *	bfin_crypto_crc_probe - Initialize module
- *
- */
-static int bfin_crypto_crc_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct resource *res;
-	struct bfin_crypto_crc *crc;
-	unsigned int timeout = 100000;
-	int ret;
-
-	crc = devm_kzalloc(dev, sizeof(*crc), GFP_KERNEL);
-	if (!crc) {
-		dev_err(&pdev->dev, "fail to malloc bfin_crypto_crc\n");
-		return -ENOMEM;
-	}
-
-	crc->dev = dev;
-
-	INIT_LIST_HEAD(&crc->list);
-	spin_lock_init(&crc->lock);
-	tasklet_init(&crc->done_task, bfin_crypto_crc_done_task, (unsigned long)crc);
-	crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	crc->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR((void *)crc->regs)) {
-		dev_err(&pdev->dev, "Cannot map CRC IO\n");
-		return PTR_ERR((void *)crc->regs);
-	}
-
-	crc->irq = platform_get_irq(pdev, 0);
-	if (crc->irq < 0) {
-		dev_err(&pdev->dev, "No CRC DCNTEXP IRQ specified\n");
-		return -ENOENT;
-	}
-
-	ret = devm_request_irq(dev, crc->irq, bfin_crypto_crc_handler,
-			IRQF_SHARED, dev_name(dev), crc);
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to request blackfin crc irq\n");
-		return ret;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "No CRC DMA channel specified\n");
-		return -ENOENT;
-	}
-	crc->dma_ch = res->start;
-
-	ret = request_dma(crc->dma_ch, dev_name(dev));
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to attach Blackfin CRC DMA channel\n");
-		return ret;
-	}
-
-	crc->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &crc->sg_dma, GFP_KERNEL);
-	if (crc->sg_cpu == NULL) {
-		ret = -ENOMEM;
-		goto out_error_dma;
-	}
-	/*
-	 * need at most CRC_MAX_DMA_DESC sg + CRC_MAX_DMA_DESC middle  +
-	 * 1 last + 1 next dma descriptors
-	 */
-	crc->sg_mid_buf = (u8 *)(crc->sg_cpu + ((CRC_MAX_DMA_DESC + 1) << 1));
-	crc->sg_mid_dma = crc->sg_dma + sizeof(struct dma_desc_array)
-			* ((CRC_MAX_DMA_DESC + 1) << 1);
-
-	writel(0, &crc->regs->control);
-	crc->poly = (u32)pdev->dev.platform_data;
-	writel(crc->poly, &crc->regs->poly);
-
-	while (!(readl(&crc->regs->status) & LUTDONE) && (--timeout) > 0)
-		cpu_relax();
-
-	if (timeout == 0)
-		dev_info(&pdev->dev, "init crc poly timeout\n");
-
-	platform_set_drvdata(pdev, crc);
-
-	spin_lock(&crc_list.lock);
-	list_add(&crc->list, &crc_list.dev_list);
-	spin_unlock(&crc_list.lock);
-
-	if (list_is_singular(&crc_list.dev_list)) {
-		ret = crypto_register_ahash(&algs);
-		if (ret) {
-			dev_err(&pdev->dev,
-				"Can't register crypto ahash device\n");
-			goto out_error_dma;
-		}
-	}
-
-	dev_info(&pdev->dev, "initialized\n");
-
-	return 0;
-
-out_error_dma:
-	if (crc->sg_cpu)
-		dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma);
-	free_dma(crc->dma_ch);
-
-	return ret;
-}
-
-/**
- *	bfin_crypto_crc_remove - Initialize module
- *
- */
-static int bfin_crypto_crc_remove(struct platform_device *pdev)
-{
-	struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
-
-	if (!crc)
-		return -ENODEV;
-
-	spin_lock(&crc_list.lock);
-	list_del(&crc->list);
-	spin_unlock(&crc_list.lock);
-
-	crypto_unregister_ahash(&algs);
-	tasklet_kill(&crc->done_task);
-	free_dma(crc->dma_ch);
-
-	return 0;
-}
-
-static struct platform_driver bfin_crypto_crc_driver = {
-	.probe     = bfin_crypto_crc_probe,
-	.remove    = bfin_crypto_crc_remove,
-	.suspend   = bfin_crypto_crc_suspend,
-	.resume    = bfin_crypto_crc_resume,
-	.driver    = {
-		.name  = DRIVER_NAME,
-	},
-};
-
-/**
- *	bfin_crypto_crc_mod_init - Initialize module
- *
- *	Checks the module params and registers the platform driver.
- *	Real work is in the platform probe function.
- */
-static int __init bfin_crypto_crc_mod_init(void)
-{
-	int ret;
-
-	pr_info("Blackfin hardware CRC crypto driver\n");
-
-	INIT_LIST_HEAD(&crc_list.dev_list);
-	spin_lock_init(&crc_list.lock);
-
-	ret = platform_driver_register(&bfin_crypto_crc_driver);
-	if (ret) {
-		pr_err("unable to register driver\n");
-		return ret;
-	}
-
-	return 0;
-}
-
-/**
- *	bfin_crypto_crc_mod_exit - Deinitialize module
- */
-static void __exit bfin_crypto_crc_mod_exit(void)
-{
-	platform_driver_unregister(&bfin_crypto_crc_driver);
-}
-
-module_init(bfin_crypto_crc_mod_init);
-module_exit(bfin_crypto_crc_mod_exit);
-
-MODULE_AUTHOR("Sonic Zhang <sonic.zhang@analog.com>");
-MODULE_DESCRIPTION("Blackfin CRC hardware crypto driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/bfin_crc.h b/drivers/crypto/bfin_crc.h
deleted file mode 100644
index 786ef746d109..000000000000
--- a/drivers/crypto/bfin_crc.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * bfin_crc.h - interface to Blackfin CRC controllers
- *
- * Copyright 2012 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef __BFIN_CRC_H__
-#define __BFIN_CRC_H__
-
-/* Function driver which use hardware crc must initialize the structure */
-struct crc_info {
-	/* Input data address */
-	unsigned char *in_addr;
-	/* Output data address */
-	unsigned char *out_addr;
-	/* Input or output bytes */
-	unsigned long datasize;
-	union {
-	/* CRC to compare with that of input buffer */
-	unsigned long crc_compare;
-	/* Value to compare with input data */
-	unsigned long val_verify;
-	/* Value to fill */
-	unsigned long val_fill;
-	};
-	/* Value to program the 32b CRC Polynomial */
-	unsigned long crc_poly;
-	union {
-	/* CRC calculated from the input data */
-	unsigned long crc_result;
-	/* First failed position to verify input data */
-	unsigned long pos_verify;
-	};
-	/* CRC mirror flags */
-	unsigned int bitmirr:1;
-	unsigned int bytmirr:1;
-	unsigned int w16swp:1;
-	unsigned int fdsel:1;
-	unsigned int rsltmirr:1;
-	unsigned int polymirr:1;
-	unsigned int cmpmirr:1;
-};
-
-/* Userspace interface */
-#define CRC_IOC_MAGIC		'C'
-#define CRC_IOC_CALC_CRC	_IOWR('C', 0x01, unsigned int)
-#define CRC_IOC_MEMCPY_CRC	_IOWR('C', 0x02, unsigned int)
-#define CRC_IOC_VERIFY_VAL	_IOWR('C', 0x03, unsigned int)
-#define CRC_IOC_FILL_VAL	_IOWR('C', 0x04, unsigned int)
-
-
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-
-struct crc_register {
-	u32 control;
-	u32 datacnt;
-	u32 datacntrld;
-	u32 __pad_1[2];
-	u32 compare;
-	u32 fillval;
-	u32 datafifo;
-	u32 intren;
-	u32 intrenset;
-	u32 intrenclr;
-	u32 poly;
-	u32 __pad_2[4];
-	u32 status;
-	u32 datacntcap;
-	u32 __pad_3;
-	u32 result;
-	u32 curresult;
-	u32 __pad_4[3];
-	u32 revid;
-};
-
-/* CRC_STATUS Masks */
-#define CMPERR			0x00000002	/* Compare error */
-#define DCNTEXP			0x00000010	/* datacnt register expired */
-#define IBR			0x00010000	/* Input buffer ready */
-#define OBR			0x00020000	/* Output buffer ready */
-#define IRR			0x00040000	/* Immediate result readt */
-#define LUTDONE			0x00080000	/* Look-up table generation done */
-#define FSTAT			0x00700000	/* FIFO status */
-#define MAX_FIFO		4		/* Max fifo size */
-
-/* CRC_CONTROL Masks */
-#define BLKEN			0x00000001	/* Block enable */
-#define OPMODE			0x000000F0	/* Operation mode */
-#define OPMODE_OFFSET		4		/* Operation mode mask offset*/
-#define MODE_DMACPY_CRC		1		/* MTM CRC compute and compare */
-#define MODE_DATA_FILL		2		/* MTM data fill */
-#define MODE_CALC_CRC		3		/* MSM CRC compute and compare */
-#define MODE_DATA_VERIFY	4		/* MSM data verify */
-#define AUTOCLRZ		0x00000100	/* Auto clear to zero */
-#define AUTOCLRF		0x00000200	/* Auto clear to one */
-#define OBRSTALL		0x00001000	/* Stall on output buffer ready */
-#define IRRSTALL		0x00002000	/* Stall on immediate result ready */
-#define BITMIRR			0x00010000	/* Mirror bits within each byte of 32-bit input data */
-#define BITMIRR_OFFSET		16		/* Mirror bits offset */
-#define BYTMIRR			0x00020000	/* Mirror bytes of 32-bit input data */
-#define BYTMIRR_OFFSET		17		/* Mirror bytes offset */
-#define W16SWP			0x00040000	/* Mirror uppper and lower 16-bit word of 32-bit input data */
-#define W16SWP_OFFSET		18		/* Mirror 16-bit word offset */
-#define FDSEL			0x00080000	/* FIFO is written after input data is mirrored */
-#define FDSEL_OFFSET		19		/* Mirror FIFO offset */
-#define RSLTMIRR		0x00100000	/* CRC result registers are mirrored. */
-#define RSLTMIRR_OFFSET		20		/* Mirror CRC result offset. */
-#define POLYMIRR		0x00200000	/* CRC poly register is mirrored. */
-#define POLYMIRR_OFFSET		21		/* Mirror CRC poly offset. */
-#define CMPMIRR			0x00400000	/* CRC compare register is mirrored. */
-#define CMPMIRR_OFFSET		22		/* Mirror CRC compare offset. */
-
-/* CRC_INTREN Masks */
-#define CMPERRI 		0x02		/* CRC_ERROR_INTR */
-#define DCNTEXPI 		0x10		/* CRC_STATUS_INTR */
-
-#endif
-
-#endif
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 2188235be02d..7207a535942d 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -328,6 +328,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -349,7 +350,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ivsize, ctx->authsize, false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -366,7 +367,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ivsize, ctx->authsize, false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -387,6 +388,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -408,7 +410,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -425,7 +428,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -447,6 +451,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -468,7 +473,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -485,7 +491,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -563,9 +570,11 @@ static int aead_setkey(struct crypto_aead *aead,
 
 skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
+	memzero_explicit(&keys, sizeof(keys));
 	return aead_set_sh_desc(aead);
 badkey:
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index ceb93fbb76e6..8ae7a1be7dfd 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -625,10 +625,13 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_givencap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize)
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi)
 {
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1,
 	    *zero_assoc_jump_cmd2;
@@ -650,11 +653,35 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		append_math_sub_imm_u32(desc, VARSEQOUTLEN, SEQINLEN, IMM,
+					ivsize);
+	} else {
+		append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0,
+				CAAM_CMD_SZ);
+	}
+
 	/* if assoclen + cryptlen is ZERO, skip to ICV write */
-	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
 						 JUMP_COND_MATH_Z);
 
+	if (is_qi)
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+
 	/* if assoclen is ZERO, skip reading the assoc data */
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
@@ -686,8 +713,11 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
 
-	/* jump the zero-payload commands */
-	append_jump(desc, JUMP_TEST_ALL | 2);
+	/* jump to ICV writing */
+	if (is_qi)
+		append_jump(desc, JUMP_TEST_ALL | 4);
+	else
+		append_jump(desc, JUMP_TEST_ALL | 2);
 
 	/* zero-payload commands */
 	set_jump_tgt_here(desc, zero_payload_jump_cmd);
@@ -695,10 +725,18 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	/* read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
+	if (is_qi)
+		/* jump to ICV writing */
+		append_jump(desc, JUMP_TEST_ALL | 2);
 
 	/* There is no input data */
 	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
 
+	if (is_qi)
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 |
+				     FIFOLD_TYPE_LAST1);
+
 	/* write ICV */
 	append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
 			 LDST_SRCDST_BYTE_CONTEXT);
@@ -715,10 +753,13 @@ EXPORT_SYMBOL(cnstr_shdsc_gcm_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize)
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi)
 {
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1;
 
@@ -739,6 +780,24 @@ void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* if assoclen is ZERO, skip reading the assoc data */
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
@@ -791,10 +850,13 @@ EXPORT_SYMBOL(cnstr_shdsc_gcm_decap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 	u32 *key_jump_cmd;
 
@@ -815,7 +877,29 @@ void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 
-	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
+	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, ivsize);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
 	/* Read assoc data */
@@ -823,7 +907,7 @@ void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
 	/* Skip IV */
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
 
 	/* Will read cryptlen bytes */
 	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
@@ -862,10 +946,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4106_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 	u32 *key_jump_cmd;
 
@@ -887,7 +974,29 @@ void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
-	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
+	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, ivsize);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
 	/* Read assoc data */
@@ -895,7 +1004,7 @@ void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
 	/* Skip IV */
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
 
 	/* Will read cryptlen bytes */
 	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
@@ -934,10 +1043,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4106_decap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 
@@ -958,6 +1070,18 @@ void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 
+	if (is_qi) {
+		/* assoclen is not needed, skip it */
+		append_seq_fifo_load(desc, 4, FIFOLD_CLASS_SKIP);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* assoclen + cryptlen = seqinlen */
 	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
 
@@ -1004,10 +1128,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4543_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 
@@ -1028,6 +1155,18 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
+	if (is_qi) {
+		/* assoclen is not needed, skip it */
+		append_seq_fifo_load(desc, 4, FIFOLD_CLASS_SKIP);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* assoclen + cryptlen = seqoutlen */
 	append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 
diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h
index 5f9445ae2114..a917af5776ce 100644
--- a/drivers/crypto/caam/caamalg_desc.h
+++ b/drivers/crypto/caam/caamalg_desc.h
@@ -27,14 +27,20 @@
 #define DESC_GCM_BASE			(3 * CAAM_CMD_SZ)
 #define DESC_GCM_ENC_LEN		(DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
 #define DESC_GCM_DEC_LEN		(DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
+#define DESC_QI_GCM_ENC_LEN		(DESC_GCM_ENC_LEN + 6 * CAAM_CMD_SZ)
+#define DESC_QI_GCM_DEC_LEN		(DESC_GCM_DEC_LEN + 3 * CAAM_CMD_SZ)
 
 #define DESC_RFC4106_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
 #define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4106_ENC_LEN		(DESC_RFC4106_ENC_LEN + 5 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4106_DEC_LEN		(DESC_RFC4106_DEC_LEN + 5 * CAAM_CMD_SZ)
 
 #define DESC_RFC4543_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_RFC4543_ENC_LEN		(DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
 #define DESC_RFC4543_DEC_LEN		(DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4543_ENC_LEN		(DESC_RFC4543_ENC_LEN + 4 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4543_DEC_LEN		(DESC_RFC4543_DEC_LEN + 4 * CAAM_CMD_SZ)
 
 #define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
@@ -67,22 +73,28 @@ void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
 			       const bool is_qi, int era);
 
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize);
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi);
 
 void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize);
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi);
 
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
 				  unsigned int ivsize, const bool is_rfc3686,
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 4aecc9435f69..cacda0831390 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -278,12 +278,317 @@ skip_split_key:
 		}
 	}
 
+	memzero_explicit(&keys, sizeof(keys));
 	return ret;
 badkey:
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
+static int gcm_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_GCM_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_gcm_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+			      ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_GCM_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_gcm_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+			      ctx->authsize, true);
+
+	return 0;
+}
+
+static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	gcm_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int gcm_setkey(struct crypto_aead *aead,
+		      const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, ctx->dir);
+	ctx->cdata.keylen = keylen;
+
+	ret = gcm_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rfc4106_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	ctx->cdata.key_virt = ctx->key;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4106_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4106_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4106_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4106_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	rfc4106_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int rfc4106_setkey(struct crypto_aead *aead,
+			  const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	/*
+	 * The last four bytes of the key material are used as the salt value
+	 * in the nonce. Update the AES key length.
+	 */
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
+				   ctx->dir);
+
+	ret = rfc4106_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rfc4543_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	ctx->cdata.key_virt = ctx->key;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4543_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4543_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4543_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4543_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	return 0;
+}
+
+static int rfc4543_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	rfc4543_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int rfc4543_setkey(struct crypto_aead *aead,
+			  const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	/*
+	 * The last four bytes of the key material are used as the salt value
+	 * in the nonce. Update the AES key length.
+	 */
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
+				   ctx->dir);
+
+	ret = rfc4543_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 			     const u8 *key, unsigned int keylen)
 {
@@ -562,8 +867,18 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status)
 	qidev = caam_ctx->qidev;
 
 	if (unlikely(status)) {
+		u32 ssrc = status & JRSTA_SSRC_MASK;
+		u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
+
 		caam_jr_strstatus(qidev, status);
-		ecode = -EIO;
+		/*
+		 * verify hw auth check passed else return -EBADMSG
+		 */
+		if (ssrc == JRSTA_SSRC_CCB_ERROR &&
+		    err_id == JRSTA_CCBERR_ERRID_ICVCHK)
+			ecode = -EBADMSG;
+		else
+			ecode = -EIO;
 	}
 
 	edesc = container_of(drv_req, typeof(*edesc), drv_req);
@@ -807,6 +1122,22 @@ static int aead_decrypt(struct aead_request *req)
 	return aead_crypt(req, false);
 }
 
+static int ipsec_gcm_encrypt(struct aead_request *req)
+{
+	if (req->assoclen < 8)
+		return -EINVAL;
+
+	return aead_crypt(req, true);
+}
+
+static int ipsec_gcm_decrypt(struct aead_request *req)
+{
+	if (req->assoclen < 8)
+		return -EINVAL;
+
+	return aead_crypt(req, false);
+}
+
 static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 {
 	struct ablkcipher_edesc *edesc;
@@ -1327,6 +1658,61 @@ static struct caam_alg_template driver_algs[] = {
 };
 
 static struct caam_aead_alg driver_aeads[] = {
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4106(gcm(aes))",
+				.cra_driver_name = "rfc4106-gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4106_setkey,
+			.setauthsize = rfc4106_setauthsize,
+			.encrypt = ipsec_gcm_encrypt,
+			.decrypt = ipsec_gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4543(gcm(aes))",
+				.cra_driver_name = "rfc4543-gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4543_setkey,
+			.setauthsize = rfc4543_setauthsize,
+			.encrypt = ipsec_gcm_encrypt,
+			.decrypt = ipsec_gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	/* Galois Counter Mode */
+	{
+		.aead = {
+			.base = {
+				.cra_name = "gcm(aes)",
+				.cra_driver_name = "gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = gcm_setkey,
+			.setauthsize = gcm_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = 12,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		}
+	},
 	/* single-pass ipsec_esp descriptor */
 	{
 		.aead = {
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index e843cf410373..e4cc636e1104 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -337,7 +337,8 @@ static int caam_remove(struct platform_device *pdev)
 
 	/* shut clocks off before finalizing shutdown */
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
-	clk_disable_unprepare(ctrlpriv->caam_mem);
+	if (ctrlpriv->caam_mem)
+		clk_disable_unprepare(ctrlpriv->caam_mem);
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 	if (ctrlpriv->caam_emi_slow)
 		clk_disable_unprepare(ctrlpriv->caam_emi_slow);
@@ -466,14 +467,17 @@ static int caam_probe(struct platform_device *pdev)
 	}
 	ctrlpriv->caam_ipg = clk;
 
-	clk = caam_drv_identify_clk(&pdev->dev, "mem");
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		dev_err(&pdev->dev,
-			"can't identify CAAM mem clk: %d\n", ret);
-		return ret;
+	if (!of_machine_is_compatible("fsl,imx7d") &&
+	    !of_machine_is_compatible("fsl,imx7s")) {
+		clk = caam_drv_identify_clk(&pdev->dev, "mem");
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
+			dev_err(&pdev->dev,
+				"can't identify CAAM mem clk: %d\n", ret);
+			return ret;
+		}
+		ctrlpriv->caam_mem = clk;
 	}
-	ctrlpriv->caam_mem = clk;
 
 	clk = caam_drv_identify_clk(&pdev->dev, "aclk");
 	if (IS_ERR(clk)) {
@@ -484,7 +488,9 @@ static int caam_probe(struct platform_device *pdev)
 	}
 	ctrlpriv->caam_aclk = clk;
 
-	if (!of_machine_is_compatible("fsl,imx6ul")) {
+	if (!of_machine_is_compatible("fsl,imx6ul") &&
+	    !of_machine_is_compatible("fsl,imx7d") &&
+	    !of_machine_is_compatible("fsl,imx7s")) {
 		clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
 		if (IS_ERR(clk)) {
 			ret = PTR_ERR(clk);
@@ -501,11 +507,13 @@ static int caam_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = clk_prepare_enable(ctrlpriv->caam_mem);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "can't enable CAAM secure mem clock: %d\n",
-			ret);
-		goto disable_caam_ipg;
+	if (ctrlpriv->caam_mem) {
+		ret = clk_prepare_enable(ctrlpriv->caam_mem);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "can't enable CAAM secure mem clock: %d\n",
+				ret);
+			goto disable_caam_ipg;
+		}
 	}
 
 	ret = clk_prepare_enable(ctrlpriv->caam_aclk);
@@ -815,9 +823,6 @@ static int caam_probe(struct platform_device *pdev)
 	return 0;
 
 caam_remove:
-#ifdef CONFIG_DEBUG_FS
-	debugfs_remove_recursive(ctrlpriv->dfs_root);
-#endif
 	caam_remove(pdev);
 	return ret;
 
@@ -829,7 +834,8 @@ disable_caam_emi_slow:
 disable_caam_aclk:
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 disable_caam_mem:
-	clk_disable_unprepare(ctrlpriv->caam_mem);
+	if (ctrlpriv->caam_mem)
+		clk_disable_unprepare(ctrlpriv->caam_mem);
 disable_caam_ipg:
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
 	return ret;
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index f9a44f485aac..b9480828da38 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -579,8 +579,15 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
 
 	fd = &dqrr->fd;
 	status = be32_to_cpu(fd->status);
-	if (unlikely(status))
-		dev_err(qidev, "Error: %#x in CAAM response FD\n", status);
+	if (unlikely(status)) {
+		u32 ssrc = status & JRSTA_SSRC_MASK;
+		u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
+
+		if (ssrc != JRSTA_SSRC_CCB_ERROR ||
+		    err_id != JRSTA_CCBERR_ERRID_ICVCHK)
+			dev_err(qidev, "Error: %#x in CAAM response FD\n",
+				status);
+	}
 
 	if (unlikely(qm_fd_get_format(fd) != qm_fd_compound)) {
 		dev_err(qidev, "Non-compound FD from CAAM\n");
diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c
index 34a6d8bf229e..06ad85ab5e86 100644
--- a/drivers/crypto/cavium/cpt/cptpf_main.c
+++ b/drivers/crypto/cavium/cpt/cptpf_main.c
@@ -436,7 +436,7 @@ static int cpt_device_init(struct cpt_device *cpt)
 
 	/* Reset the PF when probed first */
 	cpt_reset(cpt);
-	mdelay(100);
+	msleep(100);
 
 	/*Check BIST status*/
 	bist = (u64)cpt_check_bist_status(cpt);
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 60fc0fa26fd3..26687f318de6 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -46,7 +46,7 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
 	}
 
 	/* Update result area if supplied */
-	if (req->result)
+	if (req->result && rctx->final)
 		memcpy(req->result, rctx->iv, digest_size);
 
 e_free:
diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c
index e6db8672d89c..05850dfd7940 100644
--- a/drivers/crypto/ccp/ccp-crypto-rsa.c
+++ b/drivers/crypto/ccp/ccp-crypto-rsa.c
@@ -60,10 +60,9 @@ static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret)
 
 static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
 {
-	if (ccp_version() > CCP_VERSION(3, 0))
-		return CCP5_RSA_MAXMOD;
-	else
-		return CCP_RSA_MAXMOD;
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	return ctx->u.rsa.n_len;
 }
 
 static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt)
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 8b9b16d433f7..871c9628a2ee 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -47,7 +47,7 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
 	}
 
 	/* Update result area if supplied */
-	if (req->result)
+	if (req->result && rctx->final)
 		memcpy(req->result, rctx->ctx, digest_size);
 
 e_free:
diff --git a/drivers/crypto/ccp/ccp-debugfs.c b/drivers/crypto/ccp/ccp-debugfs.c
index 59d4ca4e72d8..1a734bd2070a 100644
--- a/drivers/crypto/ccp/ccp-debugfs.c
+++ b/drivers/crypto/ccp/ccp-debugfs.c
@@ -278,7 +278,7 @@ static const struct file_operations ccp_debugfs_stats_ops = {
 };
 
 static struct dentry *ccp_debugfs_dir;
-static DEFINE_RWLOCK(ccp_debugfs_lock);
+static DEFINE_MUTEX(ccp_debugfs_lock);
 
 #define	MAX_NAME_LEN	20
 
@@ -290,16 +290,15 @@ void ccp5_debugfs_setup(struct ccp_device *ccp)
 	struct dentry *debugfs_stats;
 	struct dentry *debugfs_q_instance;
 	struct dentry *debugfs_q_stats;
-	unsigned long flags;
 	int i;
 
 	if (!debugfs_initialized())
 		return;
 
-	write_lock_irqsave(&ccp_debugfs_lock, flags);
+	mutex_lock(&ccp_debugfs_lock);
 	if (!ccp_debugfs_dir)
 		ccp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
-	write_unlock_irqrestore(&ccp_debugfs_lock, flags);
+	mutex_unlock(&ccp_debugfs_lock);
 	if (!ccp_debugfs_dir)
 		return;
 
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index 8b9da58459df..67155cb21636 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -38,7 +38,7 @@ static unsigned int dma_chan_attr = CCP_DMA_DFLT;
 module_param(dma_chan_attr, uint, 0444);
 MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public");
 
-unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
+static unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
 {
 	switch (dma_chan_attr) {
 	case CCP_DMA_DFLT:
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 406b95329b3d..0ea43cdeb05f 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -178,14 +178,18 @@ static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
 	return 0;
 }
 
-static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
-			    struct scatterlist *sg, unsigned int sg_offset,
-			    unsigned int len)
+static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
+			   struct scatterlist *sg, unsigned int sg_offset,
+			   unsigned int len)
 {
 	WARN_ON(!wa->address);
 
+	if (len > (wa->length - wa_offset))
+		return -EINVAL;
+
 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
 				 0);
+	return 0;
 }
 
 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
@@ -205,8 +209,11 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
 				   unsigned int len)
 {
 	u8 *p, *q;
+	int	rc;
 
-	ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
+	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
+	if (rc)
+		return rc;
 
 	p = wa->address + wa_offset;
 	q = p + len - 1;
@@ -509,7 +516,9 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 		return ret;
 
 	dm_offset = CCP_SB_BYTES - aes->key_len;
-	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	if (ret)
+		goto e_key;
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
@@ -528,7 +537,9 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 		goto e_key;
 
 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
-	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	if (ret)
+		goto e_ctx;
 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
@@ -556,8 +567,10 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 				goto e_src;
 			}
 
-			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
-					aes->cmac_key_len);
+			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
+					      aes->cmac_key_len);
+			if (ret)
+				goto e_src;
 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
 					     CCP_PASSTHRU_BYTESWAP_256BIT);
 			if (ret) {
@@ -666,7 +679,9 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
 		return ret;
 
 	dm_offset = CCP_SB_BYTES - aes->key_len;
-	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	if (ret)
+		goto e_key;
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
@@ -685,7 +700,9 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
 		goto e_key;
 
 	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
-	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	if (ret)
+		goto e_ctx;
 
 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
@@ -777,7 +794,9 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
 		goto e_dst;
 	}
 
-	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	if (ret)
+		goto e_dst;
 
 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
@@ -820,7 +839,9 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
 					   DMA_BIDIRECTIONAL);
 		if (ret)
 			goto e_tag;
-		ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE);
+		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE);
+		if (ret)
+			goto e_tag;
 
 		ret = memcmp(tag.address, final_wa.address, AES_BLOCK_SIZE);
 		ccp_dm_free(&tag);
@@ -914,7 +935,9 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		return ret;
 
 	dm_offset = CCP_SB_BYTES - aes->key_len;
-	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	if (ret)
+		goto e_key;
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
@@ -935,7 +958,9 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	if (aes->mode != CCP_AES_MODE_ECB) {
 		/* Load the AES context - convert to LE */
 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
-		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+		if (ret)
+			goto e_ctx;
 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
 				     CCP_PASSTHRU_BYTESWAP_256BIT);
 		if (ret) {
@@ -1113,8 +1138,12 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 		 * big endian to little endian.
 		 */
 		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
-		ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
-		ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
+		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
+		if (ret)
+			goto e_key;
+		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
+		if (ret)
+			goto e_key;
 	} else {
 		/* Version 5 CCPs use a 512-bit space for the key: each portion
 		 * occupies 256 bits, or one entire slot, and is zero-padded.
@@ -1123,9 +1152,13 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 
 		dm_offset = CCP_SB_BYTES;
 		pad = dm_offset - xts->key_len;
-		ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
-		ccp_set_dm_area(&key, dm_offset + pad, xts->key, xts->key_len,
-				xts->key_len);
+		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
+		if (ret)
+			goto e_key;
+		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
+				      xts->key_len, xts->key_len);
+		if (ret)
+			goto e_key;
 	}
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
@@ -1144,7 +1177,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	if (ret)
 		goto e_key;
 
-	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
+	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
+	if (ret)
+		goto e_ctx;
 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
 			     CCP_PASSTHRU_BYTESWAP_NOOP);
 	if (ret) {
@@ -1287,12 +1322,18 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
 
 	len_singlekey = des3->key_len / 3;
-	ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
-			des3->key, 0, len_singlekey);
-	ccp_set_dm_area(&key, dm_offset + len_singlekey,
-			des3->key, len_singlekey, len_singlekey);
-	ccp_set_dm_area(&key, dm_offset,
-			des3->key, 2 * len_singlekey, len_singlekey);
+	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
+			      des3->key, 0, len_singlekey);
+	if (ret)
+		goto e_key;
+	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
+			      des3->key, len_singlekey, len_singlekey);
+	if (ret)
+		goto e_key;
+	ret = ccp_set_dm_area(&key, dm_offset,
+			      des3->key, 2 * len_singlekey, len_singlekey);
+	if (ret)
+		goto e_key;
 
 	/* Copy the key to the SB */
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
@@ -1320,7 +1361,10 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
 		/* Load the context into the LSB */
 		dm_offset = CCP_SB_BYTES - des3->iv_len;
-		ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0, des3->iv_len);
+		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
+				      des3->iv_len);
+		if (ret)
+			goto e_ctx;
 
 		if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
 			load_mode = CCP_PASSTHRU_BYTESWAP_NOOP;
@@ -1604,8 +1648,10 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		}
 	} else {
 		/* Restore the context */
-		ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
-				sb_count * CCP_SB_BYTES);
+		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
+				      sb_count * CCP_SB_BYTES);
+		if (ret)
+			goto e_ctx;
 	}
 
 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
@@ -1927,7 +1973,9 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
 		if (ret)
 			return ret;
 
-		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
+		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
+		if (ret)
+			goto e_mask;
 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
 				     CCP_PASSTHRU_BYTESWAP_NOOP);
 		if (ret) {
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index b3afb6cc9d72..d95ec526587a 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -367,8 +367,6 @@ e_free:
 
 void *psp_copy_user_blob(u64 __user uaddr, u32 len)
 {
-	void *data;
-
 	if (!uaddr || !len)
 		return ERR_PTR(-EINVAL);
 
@@ -376,18 +374,7 @@ void *psp_copy_user_blob(u64 __user uaddr, u32 len)
 	if (len > SEV_FW_BLOB_MAX_SIZE)
 		return ERR_PTR(-EINVAL);
 
-	data = kmalloc(len, GFP_KERNEL);
-	if (!data)
-		return ERR_PTR(-ENOMEM);
-
-	if (copy_from_user(data, (void __user *)(uintptr_t)uaddr, len))
-		goto e_free;
-
-	return data;
-
-e_free:
-	kfree(data);
-	return ERR_PTR(-EFAULT);
+	return memdup_user((void __user *)(uintptr_t)uaddr, len);
 }
 EXPORT_SYMBOL_GPL(psp_copy_user_blob);
 
diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c
index eb0da6572720..e0459002eb71 100644
--- a/drivers/crypto/ccp/sp-dev.c
+++ b/drivers/crypto/ccp/sp-dev.c
@@ -252,12 +252,12 @@ struct sp_device *sp_get_psp_master_device(void)
 		goto unlock;
 
 	list_for_each_entry(i, &sp_units, entry) {
-		if (i->psp_data)
+		if (i->psp_data && i->get_psp_master_device) {
+			ret = i->get_psp_master_device();
 			break;
+		}
 	}
 
-	if (i->get_psp_master_device)
-		ret = i->get_psp_master_device();
 unlock:
 	write_unlock_irqrestore(&sp_unit_lock, flags);
 	return ret;
diff --git a/drivers/crypto/ccree/Makefile b/drivers/crypto/ccree/Makefile
new file mode 100644
index 000000000000..bdc27970f95f
--- /dev/null
+++ b/drivers/crypto/ccree/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CRYPTO_DEV_CCREE) := ccree.o
+ccree-y := cc_driver.o cc_buffer_mgr.o cc_request_mgr.o cc_cipher.o cc_hash.o cc_aead.o cc_ivgen.o cc_sram_mgr.o
+ccree-$(CONFIG_CRYPTO_FIPS) += cc_fips.o
+ccree-$(CONFIG_DEBUG_FS) += cc_debugfs.o
+ccree-$(CONFIG_PM) += cc_pm.o
diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c
new file mode 100644
index 000000000000..03f4b9fce556
--- /dev/null
+++ b/drivers/crypto/ccree/cc_aead.c
@@ -0,0 +1,2718 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
+#include <crypto/authenc.h>
+#include <crypto/des.h>
+#include <linux/rtnetlink.h>
+#include "cc_driver.h"
+#include "cc_buffer_mgr.h"
+#include "cc_aead.h"
+#include "cc_request_mgr.h"
+#include "cc_hash.h"
+#include "cc_sram_mgr.h"
+
+#define template_aead	template_u.aead
+
+#define MAX_AEAD_SETKEY_SEQ 12
+#define MAX_AEAD_PROCESS_SEQ 23
+
+#define MAX_HMAC_DIGEST_SIZE (SHA256_DIGEST_SIZE)
+#define MAX_HMAC_BLOCK_SIZE (SHA256_BLOCK_SIZE)
+
+#define AES_CCM_RFC4309_NONCE_SIZE 3
+#define MAX_NONCE_SIZE CTR_RFC3686_NONCE_SIZE
+
+/* Value of each ICV_CMP byte (of 8) in case of success */
+#define ICV_VERIF_OK 0x01
+
+struct cc_aead_handle {
+	cc_sram_addr_t sram_workspace_addr;
+	struct list_head aead_list;
+};
+
+struct cc_hmac_s {
+	u8 *padded_authkey;
+	u8 *ipad_opad; /* IPAD, OPAD*/
+	dma_addr_t padded_authkey_dma_addr;
+	dma_addr_t ipad_opad_dma_addr;
+};
+
+struct cc_xcbc_s {
+	u8 *xcbc_keys; /* K1,K2,K3 */
+	dma_addr_t xcbc_keys_dma_addr;
+};
+
+struct cc_aead_ctx {
+	struct cc_drvdata *drvdata;
+	u8 ctr_nonce[MAX_NONCE_SIZE]; /* used for ctr3686 iv and aes ccm */
+	u8 *enckey;
+	dma_addr_t enckey_dma_addr;
+	union {
+		struct cc_hmac_s hmac;
+		struct cc_xcbc_s xcbc;
+	} auth_state;
+	unsigned int enc_keylen;
+	unsigned int auth_keylen;
+	unsigned int authsize; /* Actual (reduced?) size of the MAC/ICv */
+	enum drv_cipher_mode cipher_mode;
+	enum cc_flow_mode flow_mode;
+	enum drv_hash_mode auth_mode;
+};
+
+static inline bool valid_assoclen(struct aead_request *req)
+{
+	return ((req->assoclen == 16) || (req->assoclen == 20));
+}
+
+static void cc_aead_exit(struct crypto_aead *tfm)
+{
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "Clearing context @%p for %s\n", crypto_aead_ctx(tfm),
+		crypto_tfm_alg_name(&tfm->base));
+
+	/* Unmap enckey buffer */
+	if (ctx->enckey) {
+		dma_free_coherent(dev, AES_MAX_KEY_SIZE, ctx->enckey,
+				  ctx->enckey_dma_addr);
+		dev_dbg(dev, "Freed enckey DMA buffer enckey_dma_addr=%pad\n",
+			&ctx->enckey_dma_addr);
+		ctx->enckey_dma_addr = 0;
+		ctx->enckey = NULL;
+	}
+
+	if (ctx->auth_mode == DRV_HASH_XCBC_MAC) { /* XCBC authetication */
+		struct cc_xcbc_s *xcbc = &ctx->auth_state.xcbc;
+
+		if (xcbc->xcbc_keys) {
+			dma_free_coherent(dev, CC_AES_128_BIT_KEY_SIZE * 3,
+					  xcbc->xcbc_keys,
+					  xcbc->xcbc_keys_dma_addr);
+		}
+		dev_dbg(dev, "Freed xcbc_keys DMA buffer xcbc_keys_dma_addr=%pad\n",
+			&xcbc->xcbc_keys_dma_addr);
+		xcbc->xcbc_keys_dma_addr = 0;
+		xcbc->xcbc_keys = NULL;
+	} else if (ctx->auth_mode != DRV_HASH_NULL) { /* HMAC auth. */
+		struct cc_hmac_s *hmac = &ctx->auth_state.hmac;
+
+		if (hmac->ipad_opad) {
+			dma_free_coherent(dev, 2 * MAX_HMAC_DIGEST_SIZE,
+					  hmac->ipad_opad,
+					  hmac->ipad_opad_dma_addr);
+			dev_dbg(dev, "Freed ipad_opad DMA buffer ipad_opad_dma_addr=%pad\n",
+				&hmac->ipad_opad_dma_addr);
+			hmac->ipad_opad_dma_addr = 0;
+			hmac->ipad_opad = NULL;
+		}
+		if (hmac->padded_authkey) {
+			dma_free_coherent(dev, MAX_HMAC_BLOCK_SIZE,
+					  hmac->padded_authkey,
+					  hmac->padded_authkey_dma_addr);
+			dev_dbg(dev, "Freed padded_authkey DMA buffer padded_authkey_dma_addr=%pad\n",
+				&hmac->padded_authkey_dma_addr);
+			hmac->padded_authkey_dma_addr = 0;
+			hmac->padded_authkey = NULL;
+		}
+	}
+}
+
+static int cc_aead_init(struct crypto_aead *tfm)
+{
+	struct aead_alg *alg = crypto_aead_alg(tfm);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct cc_crypto_alg *cc_alg =
+			container_of(alg, struct cc_crypto_alg, aead_alg);
+	struct device *dev = drvdata_to_dev(cc_alg->drvdata);
+
+	dev_dbg(dev, "Initializing context @%p for %s\n", ctx,
+		crypto_tfm_alg_name(&tfm->base));
+
+	/* Initialize modes in instance */
+	ctx->cipher_mode = cc_alg->cipher_mode;
+	ctx->flow_mode = cc_alg->flow_mode;
+	ctx->auth_mode = cc_alg->auth_mode;
+	ctx->drvdata = cc_alg->drvdata;
+	crypto_aead_set_reqsize(tfm, sizeof(struct aead_req_ctx));
+
+	/* Allocate key buffer, cache line aligned */
+	ctx->enckey = dma_alloc_coherent(dev, AES_MAX_KEY_SIZE,
+					 &ctx->enckey_dma_addr, GFP_KERNEL);
+	if (!ctx->enckey) {
+		dev_err(dev, "Failed allocating key buffer\n");
+		goto init_failed;
+	}
+	dev_dbg(dev, "Allocated enckey buffer in context ctx->enckey=@%p\n",
+		ctx->enckey);
+
+	/* Set default authlen value */
+
+	if (ctx->auth_mode == DRV_HASH_XCBC_MAC) { /* XCBC authetication */
+		struct cc_xcbc_s *xcbc = &ctx->auth_state.xcbc;
+		const unsigned int key_size = CC_AES_128_BIT_KEY_SIZE * 3;
+
+		/* Allocate dma-coherent buffer for XCBC's K1+K2+K3 */
+		/* (and temporary for user key - up to 256b) */
+		xcbc->xcbc_keys = dma_alloc_coherent(dev, key_size,
+						     &xcbc->xcbc_keys_dma_addr,
+						     GFP_KERNEL);
+		if (!xcbc->xcbc_keys) {
+			dev_err(dev, "Failed allocating buffer for XCBC keys\n");
+			goto init_failed;
+		}
+	} else if (ctx->auth_mode != DRV_HASH_NULL) { /* HMAC authentication */
+		struct cc_hmac_s *hmac = &ctx->auth_state.hmac;
+		const unsigned int digest_size = 2 * MAX_HMAC_DIGEST_SIZE;
+		dma_addr_t *pkey_dma = &hmac->padded_authkey_dma_addr;
+
+		/* Allocate dma-coherent buffer for IPAD + OPAD */
+		hmac->ipad_opad = dma_alloc_coherent(dev, digest_size,
+						     &hmac->ipad_opad_dma_addr,
+						     GFP_KERNEL);
+
+		if (!hmac->ipad_opad) {
+			dev_err(dev, "Failed allocating IPAD/OPAD buffer\n");
+			goto init_failed;
+		}
+
+		dev_dbg(dev, "Allocated authkey buffer in context ctx->authkey=@%p\n",
+			hmac->ipad_opad);
+
+		hmac->padded_authkey = dma_alloc_coherent(dev,
+							  MAX_HMAC_BLOCK_SIZE,
+							  pkey_dma,
+							  GFP_KERNEL);
+
+		if (!hmac->padded_authkey) {
+			dev_err(dev, "failed to allocate padded_authkey\n");
+			goto init_failed;
+		}
+	} else {
+		ctx->auth_state.hmac.ipad_opad = NULL;
+		ctx->auth_state.hmac.padded_authkey = NULL;
+	}
+
+	return 0;
+
+init_failed:
+	cc_aead_exit(tfm);
+	return -ENOMEM;
+}
+
+static void cc_aead_complete(struct device *dev, void *cc_req, int err)
+{
+	struct aead_request *areq = (struct aead_request *)cc_req;
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(areq);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(cc_req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	cc_unmap_aead_request(dev, areq);
+
+	/* Restore ordinary iv pointer */
+	areq->iv = areq_ctx->backup_iv;
+
+	if (err)
+		goto done;
+
+	if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
+		if (memcmp(areq_ctx->mac_buf, areq_ctx->icv_virt_addr,
+			   ctx->authsize) != 0) {
+			dev_dbg(dev, "Payload authentication failure, (auth-size=%d, cipher=%d)\n",
+				ctx->authsize, ctx->cipher_mode);
+			/* In case of payload authentication failure, MUST NOT
+			 * revealed the decrypted message --> zero its memory.
+			 */
+			cc_zero_sgl(areq->dst, areq_ctx->cryptlen);
+			err = -EBADMSG;
+		}
+	} else { /*ENCRYPT*/
+		if (areq_ctx->is_icv_fragmented) {
+			u32 skip = areq->cryptlen + areq_ctx->dst_offset;
+
+			cc_copy_sg_portion(dev, areq_ctx->mac_buf,
+					   areq_ctx->dst_sgl, skip,
+					   (skip + ctx->authsize),
+					   CC_SG_FROM_BUF);
+		}
+
+		/* If an IV was generated, copy it back to the user provided
+		 * buffer.
+		 */
+		if (areq_ctx->backup_giv) {
+			if (ctx->cipher_mode == DRV_CIPHER_CTR)
+				memcpy(areq_ctx->backup_giv, areq_ctx->ctr_iv +
+				       CTR_RFC3686_NONCE_SIZE,
+				       CTR_RFC3686_IV_SIZE);
+			else if (ctx->cipher_mode == DRV_CIPHER_CCM)
+				memcpy(areq_ctx->backup_giv, areq_ctx->ctr_iv +
+				       CCM_BLOCK_IV_OFFSET, CCM_BLOCK_IV_SIZE);
+		}
+	}
+done:
+	aead_request_complete(areq, err);
+}
+
+static unsigned int xcbc_setkey(struct cc_hw_desc *desc,
+				struct cc_aead_ctx *ctx)
+{
+	/* Load the AES key */
+	hw_desc_init(&desc[0]);
+	/* We are using for the source/user key the same buffer
+	 * as for the output keys, * because after this key loading it
+	 * is not needed anymore
+	 */
+	set_din_type(&desc[0], DMA_DLLI,
+		     ctx->auth_state.xcbc.xcbc_keys_dma_addr, ctx->auth_keylen,
+		     NS_BIT);
+	set_cipher_mode(&desc[0], DRV_CIPHER_ECB);
+	set_cipher_config0(&desc[0], DRV_CRYPTO_DIRECTION_ENCRYPT);
+	set_key_size_aes(&desc[0], ctx->auth_keylen);
+	set_flow_mode(&desc[0], S_DIN_to_AES);
+	set_setup_mode(&desc[0], SETUP_LOAD_KEY0);
+
+	hw_desc_init(&desc[1]);
+	set_din_const(&desc[1], 0x01010101, CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[1], DIN_AES_DOUT);
+	set_dout_dlli(&desc[1], ctx->auth_state.xcbc.xcbc_keys_dma_addr,
+		      AES_KEYSIZE_128, NS_BIT, 0);
+
+	hw_desc_init(&desc[2]);
+	set_din_const(&desc[2], 0x02020202, CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[2], DIN_AES_DOUT);
+	set_dout_dlli(&desc[2], (ctx->auth_state.xcbc.xcbc_keys_dma_addr
+					 + AES_KEYSIZE_128),
+			      AES_KEYSIZE_128, NS_BIT, 0);
+
+	hw_desc_init(&desc[3]);
+	set_din_const(&desc[3], 0x03030303, CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[3], DIN_AES_DOUT);
+	set_dout_dlli(&desc[3], (ctx->auth_state.xcbc.xcbc_keys_dma_addr
+					  + 2 * AES_KEYSIZE_128),
+			      AES_KEYSIZE_128, NS_BIT, 0);
+
+	return 4;
+}
+
+static int hmac_setkey(struct cc_hw_desc *desc, struct cc_aead_ctx *ctx)
+{
+	unsigned int hmac_pad_const[2] = { HMAC_IPAD_CONST, HMAC_OPAD_CONST };
+	unsigned int digest_ofs = 0;
+	unsigned int hash_mode = (ctx->auth_mode == DRV_HASH_SHA1) ?
+			DRV_HASH_HW_SHA1 : DRV_HASH_HW_SHA256;
+	unsigned int digest_size = (ctx->auth_mode == DRV_HASH_SHA1) ?
+			CC_SHA1_DIGEST_SIZE : CC_SHA256_DIGEST_SIZE;
+	struct cc_hmac_s *hmac = &ctx->auth_state.hmac;
+
+	unsigned int idx = 0;
+	int i;
+
+	/* calc derived HMAC key */
+	for (i = 0; i < 2; i++) {
+		/* Load hash initial state */
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], hash_mode);
+		set_din_sram(&desc[idx],
+			     cc_larval_digest_addr(ctx->drvdata,
+						   ctx->auth_mode),
+			     digest_size);
+		set_flow_mode(&desc[idx], S_DIN_to_HASH);
+		set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+		idx++;
+
+		/* Load the hash current length*/
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], hash_mode);
+		set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz);
+		set_flow_mode(&desc[idx], S_DIN_to_HASH);
+		set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+		idx++;
+
+		/* Prepare ipad key */
+		hw_desc_init(&desc[idx]);
+		set_xor_val(&desc[idx], hmac_pad_const[i]);
+		set_cipher_mode(&desc[idx], hash_mode);
+		set_flow_mode(&desc[idx], S_DIN_to_HASH);
+		set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+		idx++;
+
+		/* Perform HASH update */
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI,
+			     hmac->padded_authkey_dma_addr,
+			     SHA256_BLOCK_SIZE, NS_BIT);
+		set_cipher_mode(&desc[idx], hash_mode);
+		set_xor_active(&desc[idx]);
+		set_flow_mode(&desc[idx], DIN_HASH);
+		idx++;
+
+		/* Get the digset */
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], hash_mode);
+		set_dout_dlli(&desc[idx],
+			      (hmac->ipad_opad_dma_addr + digest_ofs),
+			      digest_size, NS_BIT, 0);
+		set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+		set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+		set_cipher_config1(&desc[idx], HASH_PADDING_DISABLED);
+		idx++;
+
+		digest_ofs += digest_size;
+	}
+
+	return idx;
+}
+
+static int validate_keys_sizes(struct cc_aead_ctx *ctx)
+{
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "enc_keylen=%u  authkeylen=%u\n",
+		ctx->enc_keylen, ctx->auth_keylen);
+
+	switch (ctx->auth_mode) {
+	case DRV_HASH_SHA1:
+	case DRV_HASH_SHA256:
+		break;
+	case DRV_HASH_XCBC_MAC:
+		if (ctx->auth_keylen != AES_KEYSIZE_128 &&
+		    ctx->auth_keylen != AES_KEYSIZE_192 &&
+		    ctx->auth_keylen != AES_KEYSIZE_256)
+			return -ENOTSUPP;
+		break;
+	case DRV_HASH_NULL: /* Not authenc (e.g., CCM) - no auth_key) */
+		if (ctx->auth_keylen > 0)
+			return -EINVAL;
+		break;
+	default:
+		dev_err(dev, "Invalid auth_mode=%d\n", ctx->auth_mode);
+		return -EINVAL;
+	}
+	/* Check cipher key size */
+	if (ctx->flow_mode == S_DIN_to_DES) {
+		if (ctx->enc_keylen != DES3_EDE_KEY_SIZE) {
+			dev_err(dev, "Invalid cipher(3DES) key size: %u\n",
+				ctx->enc_keylen);
+			return -EINVAL;
+		}
+	} else { /* Default assumed to be AES ciphers */
+		if (ctx->enc_keylen != AES_KEYSIZE_128 &&
+		    ctx->enc_keylen != AES_KEYSIZE_192 &&
+		    ctx->enc_keylen != AES_KEYSIZE_256) {
+			dev_err(dev, "Invalid cipher(AES) key size: %u\n",
+				ctx->enc_keylen);
+			return -EINVAL;
+		}
+	}
+
+	return 0; /* All tests of keys sizes passed */
+}
+
+/* This function prepers the user key so it can pass to the hmac processing
+ * (copy to intenral buffer or hash in case of key longer than block
+ */
+static int cc_get_plain_hmac_key(struct crypto_aead *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	dma_addr_t key_dma_addr = 0;
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	u32 larval_addr = cc_larval_digest_addr(ctx->drvdata, ctx->auth_mode);
+	struct cc_crypto_req cc_req = {};
+	unsigned int blocksize;
+	unsigned int digestsize;
+	unsigned int hashmode;
+	unsigned int idx = 0;
+	int rc = 0;
+	struct cc_hw_desc desc[MAX_AEAD_SETKEY_SEQ];
+	dma_addr_t padded_authkey_dma_addr =
+		ctx->auth_state.hmac.padded_authkey_dma_addr;
+
+	switch (ctx->auth_mode) { /* auth_key required and >0 */
+	case DRV_HASH_SHA1:
+		blocksize = SHA1_BLOCK_SIZE;
+		digestsize = SHA1_DIGEST_SIZE;
+		hashmode = DRV_HASH_HW_SHA1;
+		break;
+	case DRV_HASH_SHA256:
+	default:
+		blocksize = SHA256_BLOCK_SIZE;
+		digestsize = SHA256_DIGEST_SIZE;
+		hashmode = DRV_HASH_HW_SHA256;
+	}
+
+	if (keylen != 0) {
+		key_dma_addr = dma_map_single(dev, (void *)key, keylen,
+					      DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, key_dma_addr)) {
+			dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n",
+				key, keylen);
+			return -ENOMEM;
+		}
+		if (keylen > blocksize) {
+			/* Load hash initial state */
+			hw_desc_init(&desc[idx]);
+			set_cipher_mode(&desc[idx], hashmode);
+			set_din_sram(&desc[idx], larval_addr, digestsize);
+			set_flow_mode(&desc[idx], S_DIN_to_HASH);
+			set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+			idx++;
+
+			/* Load the hash current length*/
+			hw_desc_init(&desc[idx]);
+			set_cipher_mode(&desc[idx], hashmode);
+			set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz);
+			set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED);
+			set_flow_mode(&desc[idx], S_DIN_to_HASH);
+			set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+			idx++;
+
+			hw_desc_init(&desc[idx]);
+			set_din_type(&desc[idx], DMA_DLLI,
+				     key_dma_addr, keylen, NS_BIT);
+			set_flow_mode(&desc[idx], DIN_HASH);
+			idx++;
+
+			/* Get hashed key */
+			hw_desc_init(&desc[idx]);
+			set_cipher_mode(&desc[idx], hashmode);
+			set_dout_dlli(&desc[idx], padded_authkey_dma_addr,
+				      digestsize, NS_BIT, 0);
+			set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+			set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+			set_cipher_config1(&desc[idx], HASH_PADDING_DISABLED);
+			set_cipher_config0(&desc[idx],
+					   HASH_DIGEST_RESULT_LITTLE_ENDIAN);
+			idx++;
+
+			hw_desc_init(&desc[idx]);
+			set_din_const(&desc[idx], 0, (blocksize - digestsize));
+			set_flow_mode(&desc[idx], BYPASS);
+			set_dout_dlli(&desc[idx], (padded_authkey_dma_addr +
+				      digestsize), (blocksize - digestsize),
+				      NS_BIT, 0);
+			idx++;
+		} else {
+			hw_desc_init(&desc[idx]);
+			set_din_type(&desc[idx], DMA_DLLI, key_dma_addr,
+				     keylen, NS_BIT);
+			set_flow_mode(&desc[idx], BYPASS);
+			set_dout_dlli(&desc[idx], padded_authkey_dma_addr,
+				      keylen, NS_BIT, 0);
+			idx++;
+
+			if ((blocksize - keylen) != 0) {
+				hw_desc_init(&desc[idx]);
+				set_din_const(&desc[idx], 0,
+					      (blocksize - keylen));
+				set_flow_mode(&desc[idx], BYPASS);
+				set_dout_dlli(&desc[idx],
+					      (padded_authkey_dma_addr +
+					       keylen),
+					      (blocksize - keylen), NS_BIT, 0);
+				idx++;
+			}
+		}
+	} else {
+		hw_desc_init(&desc[idx]);
+		set_din_const(&desc[idx], 0, (blocksize - keylen));
+		set_flow_mode(&desc[idx], BYPASS);
+		set_dout_dlli(&desc[idx], padded_authkey_dma_addr,
+			      blocksize, NS_BIT, 0);
+		idx++;
+	}
+
+	rc = cc_send_sync_request(ctx->drvdata, &cc_req, desc, idx);
+	if (rc)
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+
+	if (key_dma_addr)
+		dma_unmap_single(dev, key_dma_addr, keylen, DMA_TO_DEVICE);
+
+	return rc;
+}
+
+static int cc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+			  unsigned int keylen)
+{
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct rtattr *rta = (struct rtattr *)key;
+	struct cc_crypto_req cc_req = {};
+	struct crypto_authenc_key_param *param;
+	struct cc_hw_desc desc[MAX_AEAD_SETKEY_SEQ];
+	int rc = -EINVAL;
+	unsigned int seq_len = 0;
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "Setting key in context @%p for %s. key=%p keylen=%u\n",
+		ctx, crypto_tfm_alg_name(crypto_aead_tfm(tfm)), key, keylen);
+
+	/* STAT_PHASE_0: Init and sanity checks */
+
+	if (ctx->auth_mode != DRV_HASH_NULL) { /* authenc() alg. */
+		if (!RTA_OK(rta, keylen))
+			goto badkey;
+		if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
+			goto badkey;
+		if (RTA_PAYLOAD(rta) < sizeof(*param))
+			goto badkey;
+		param = RTA_DATA(rta);
+		ctx->enc_keylen = be32_to_cpu(param->enckeylen);
+		key += RTA_ALIGN(rta->rta_len);
+		keylen -= RTA_ALIGN(rta->rta_len);
+		if (keylen < ctx->enc_keylen)
+			goto badkey;
+		ctx->auth_keylen = keylen - ctx->enc_keylen;
+
+		if (ctx->cipher_mode == DRV_CIPHER_CTR) {
+			/* the nonce is stored in bytes at end of key */
+			if (ctx->enc_keylen <
+			    (AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE))
+				goto badkey;
+			/* Copy nonce from last 4 bytes in CTR key to
+			 *  first 4 bytes in CTR IV
+			 */
+			memcpy(ctx->ctr_nonce, key + ctx->auth_keylen +
+			       ctx->enc_keylen - CTR_RFC3686_NONCE_SIZE,
+			       CTR_RFC3686_NONCE_SIZE);
+			/* Set CTR key size */
+			ctx->enc_keylen -= CTR_RFC3686_NONCE_SIZE;
+		}
+	} else { /* non-authenc - has just one key */
+		ctx->enc_keylen = keylen;
+		ctx->auth_keylen = 0;
+	}
+
+	rc = validate_keys_sizes(ctx);
+	if (rc)
+		goto badkey;
+
+	/* STAT_PHASE_1: Copy key to ctx */
+
+	/* Get key material */
+	memcpy(ctx->enckey, key + ctx->auth_keylen, ctx->enc_keylen);
+	if (ctx->enc_keylen == 24)
+		memset(ctx->enckey + 24, 0, CC_AES_KEY_SIZE_MAX - 24);
+	if (ctx->auth_mode == DRV_HASH_XCBC_MAC) {
+		memcpy(ctx->auth_state.xcbc.xcbc_keys, key, ctx->auth_keylen);
+	} else if (ctx->auth_mode != DRV_HASH_NULL) { /* HMAC */
+		rc = cc_get_plain_hmac_key(tfm, key, ctx->auth_keylen);
+		if (rc)
+			goto badkey;
+	}
+
+	/* STAT_PHASE_2: Create sequence */
+
+	switch (ctx->auth_mode) {
+	case DRV_HASH_SHA1:
+	case DRV_HASH_SHA256:
+		seq_len = hmac_setkey(desc, ctx);
+		break;
+	case DRV_HASH_XCBC_MAC:
+		seq_len = xcbc_setkey(desc, ctx);
+		break;
+	case DRV_HASH_NULL: /* non-authenc modes, e.g., CCM */
+		break; /* No auth. key setup */
+	default:
+		dev_err(dev, "Unsupported authenc (%d)\n", ctx->auth_mode);
+		rc = -ENOTSUPP;
+		goto badkey;
+	}
+
+	/* STAT_PHASE_3: Submit sequence to HW */
+
+	if (seq_len > 0) { /* For CCM there is no sequence to setup the key */
+		rc = cc_send_sync_request(ctx->drvdata, &cc_req, desc, seq_len);
+		if (rc) {
+			dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+			goto setkey_error;
+		}
+	}
+
+	/* Update STAT_PHASE_3 */
+	return rc;
+
+badkey:
+	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+setkey_error:
+	return rc;
+}
+
+static int cc_rfc4309_ccm_setkey(struct crypto_aead *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (keylen < 3)
+		return -EINVAL;
+
+	keylen -= 3;
+	memcpy(ctx->ctr_nonce, key + keylen, 3);
+
+	return cc_aead_setkey(tfm, key, keylen);
+}
+
+static int cc_aead_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(authenc);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	/* Unsupported auth. sizes */
+	if (authsize == 0 ||
+	    authsize > crypto_aead_maxauthsize(authenc)) {
+		return -ENOTSUPP;
+	}
+
+	ctx->authsize = authsize;
+	dev_dbg(dev, "authlen=%d\n", ctx->authsize);
+
+	return 0;
+}
+
+static int cc_rfc4309_ccm_setauthsize(struct crypto_aead *authenc,
+				      unsigned int authsize)
+{
+	switch (authsize) {
+	case 8:
+	case 12:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return cc_aead_setauthsize(authenc, authsize);
+}
+
+static int cc_ccm_setauthsize(struct crypto_aead *authenc,
+			      unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 6:
+	case 8:
+	case 10:
+	case 12:
+	case 14:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return cc_aead_setauthsize(authenc, authsize);
+}
+
+static void cc_set_assoc_desc(struct aead_request *areq, unsigned int flow_mode,
+			      struct cc_hw_desc desc[], unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(areq);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(areq);
+	enum cc_req_dma_buf_type assoc_dma_type = areq_ctx->assoc_buff_type;
+	unsigned int idx = *seq_size;
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	switch (assoc_dma_type) {
+	case CC_DMA_BUF_DLLI:
+		dev_dbg(dev, "ASSOC buffer type DLLI\n");
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI, sg_dma_address(areq->src),
+			     areq->assoclen, NS_BIT);
+		set_flow_mode(&desc[idx], flow_mode);
+		if (ctx->auth_mode == DRV_HASH_XCBC_MAC &&
+		    areq_ctx->cryptlen > 0)
+			set_din_not_last_indication(&desc[idx]);
+		break;
+	case CC_DMA_BUF_MLLI:
+		dev_dbg(dev, "ASSOC buffer type MLLI\n");
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_MLLI, areq_ctx->assoc.sram_addr,
+			     areq_ctx->assoc.mlli_nents, NS_BIT);
+		set_flow_mode(&desc[idx], flow_mode);
+		if (ctx->auth_mode == DRV_HASH_XCBC_MAC &&
+		    areq_ctx->cryptlen > 0)
+			set_din_not_last_indication(&desc[idx]);
+		break;
+	case CC_DMA_BUF_NULL:
+	default:
+		dev_err(dev, "Invalid ASSOC buffer type\n");
+	}
+
+	*seq_size = (++idx);
+}
+
+static void cc_proc_authen_desc(struct aead_request *areq,
+				unsigned int flow_mode,
+				struct cc_hw_desc desc[],
+				unsigned int *seq_size, int direct)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(areq);
+	enum cc_req_dma_buf_type data_dma_type = areq_ctx->data_buff_type;
+	unsigned int idx = *seq_size;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(areq);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	switch (data_dma_type) {
+	case CC_DMA_BUF_DLLI:
+	{
+		struct scatterlist *cipher =
+			(direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
+			areq_ctx->dst_sgl : areq_ctx->src_sgl;
+
+		unsigned int offset =
+			(direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
+			areq_ctx->dst_offset : areq_ctx->src_offset;
+		dev_dbg(dev, "AUTHENC: SRC/DST buffer type DLLI\n");
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI,
+			     (sg_dma_address(cipher) + offset),
+			     areq_ctx->cryptlen, NS_BIT);
+		set_flow_mode(&desc[idx], flow_mode);
+		break;
+	}
+	case CC_DMA_BUF_MLLI:
+	{
+		/* DOUBLE-PASS flow (as default)
+		 * assoc. + iv + data -compact in one table
+		 * if assoclen is ZERO only IV perform
+		 */
+		cc_sram_addr_t mlli_addr = areq_ctx->assoc.sram_addr;
+		u32 mlli_nents = areq_ctx->assoc.mlli_nents;
+
+		if (areq_ctx->is_single_pass) {
+			if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) {
+				mlli_addr = areq_ctx->dst.sram_addr;
+				mlli_nents = areq_ctx->dst.mlli_nents;
+			} else {
+				mlli_addr = areq_ctx->src.sram_addr;
+				mlli_nents = areq_ctx->src.mlli_nents;
+			}
+		}
+
+		dev_dbg(dev, "AUTHENC: SRC/DST buffer type MLLI\n");
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_MLLI, mlli_addr, mlli_nents,
+			     NS_BIT);
+		set_flow_mode(&desc[idx], flow_mode);
+		break;
+	}
+	case CC_DMA_BUF_NULL:
+	default:
+		dev_err(dev, "AUTHENC: Invalid SRC/DST buffer type\n");
+	}
+
+	*seq_size = (++idx);
+}
+
+static void cc_proc_cipher_desc(struct aead_request *areq,
+				unsigned int flow_mode,
+				struct cc_hw_desc desc[],
+				unsigned int *seq_size)
+{
+	unsigned int idx = *seq_size;
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(areq);
+	enum cc_req_dma_buf_type data_dma_type = areq_ctx->data_buff_type;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(areq);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	if (areq_ctx->cryptlen == 0)
+		return; /*null processing*/
+
+	switch (data_dma_type) {
+	case CC_DMA_BUF_DLLI:
+		dev_dbg(dev, "CIPHER: SRC/DST buffer type DLLI\n");
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI,
+			     (sg_dma_address(areq_ctx->src_sgl) +
+			      areq_ctx->src_offset), areq_ctx->cryptlen,
+			      NS_BIT);
+		set_dout_dlli(&desc[idx],
+			      (sg_dma_address(areq_ctx->dst_sgl) +
+			       areq_ctx->dst_offset),
+			      areq_ctx->cryptlen, NS_BIT, 0);
+		set_flow_mode(&desc[idx], flow_mode);
+		break;
+	case CC_DMA_BUF_MLLI:
+		dev_dbg(dev, "CIPHER: SRC/DST buffer type MLLI\n");
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_MLLI, areq_ctx->src.sram_addr,
+			     areq_ctx->src.mlli_nents, NS_BIT);
+		set_dout_mlli(&desc[idx], areq_ctx->dst.sram_addr,
+			      areq_ctx->dst.mlli_nents, NS_BIT, 0);
+		set_flow_mode(&desc[idx], flow_mode);
+		break;
+	case CC_DMA_BUF_NULL:
+	default:
+		dev_err(dev, "CIPHER: Invalid SRC/DST buffer type\n");
+	}
+
+	*seq_size = (++idx);
+}
+
+static void cc_proc_digest_desc(struct aead_request *req,
+				struct cc_hw_desc desc[],
+				unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	unsigned int idx = *seq_size;
+	unsigned int hash_mode = (ctx->auth_mode == DRV_HASH_SHA1) ?
+				DRV_HASH_HW_SHA1 : DRV_HASH_HW_SHA256;
+	int direct = req_ctx->gen_ctx.op_type;
+
+	/* Get final ICV result */
+	if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) {
+		hw_desc_init(&desc[idx]);
+		set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+		set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+		set_dout_dlli(&desc[idx], req_ctx->icv_dma_addr, ctx->authsize,
+			      NS_BIT, 1);
+		set_queue_last_ind(ctx->drvdata, &desc[idx]);
+		if (ctx->auth_mode == DRV_HASH_XCBC_MAC) {
+			set_aes_not_hash_mode(&desc[idx]);
+			set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+		} else {
+			set_cipher_config0(&desc[idx],
+					   HASH_DIGEST_RESULT_LITTLE_ENDIAN);
+			set_cipher_mode(&desc[idx], hash_mode);
+		}
+	} else { /*Decrypt*/
+		/* Get ICV out from hardware */
+		hw_desc_init(&desc[idx]);
+		set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+		set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+		set_dout_dlli(&desc[idx], req_ctx->mac_buf_dma_addr,
+			      ctx->authsize, NS_BIT, 1);
+		set_queue_last_ind(ctx->drvdata, &desc[idx]);
+		set_cipher_config0(&desc[idx],
+				   HASH_DIGEST_RESULT_LITTLE_ENDIAN);
+		set_cipher_config1(&desc[idx], HASH_PADDING_DISABLED);
+		if (ctx->auth_mode == DRV_HASH_XCBC_MAC) {
+			set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+			set_aes_not_hash_mode(&desc[idx]);
+		} else {
+			set_cipher_mode(&desc[idx], hash_mode);
+		}
+	}
+
+	*seq_size = (++idx);
+}
+
+static void cc_set_cipher_desc(struct aead_request *req,
+			       struct cc_hw_desc desc[],
+			       unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	unsigned int hw_iv_size = req_ctx->hw_iv_size;
+	unsigned int idx = *seq_size;
+	int direct = req_ctx->gen_ctx.op_type;
+
+	/* Setup cipher state */
+	hw_desc_init(&desc[idx]);
+	set_cipher_config0(&desc[idx], direct);
+	set_flow_mode(&desc[idx], ctx->flow_mode);
+	set_din_type(&desc[idx], DMA_DLLI, req_ctx->gen_ctx.iv_dma_addr,
+		     hw_iv_size, NS_BIT);
+	if (ctx->cipher_mode == DRV_CIPHER_CTR)
+		set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+	else
+		set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	set_cipher_mode(&desc[idx], ctx->cipher_mode);
+	idx++;
+
+	/* Setup enc. key */
+	hw_desc_init(&desc[idx]);
+	set_cipher_config0(&desc[idx], direct);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	set_flow_mode(&desc[idx], ctx->flow_mode);
+	if (ctx->flow_mode == S_DIN_to_AES) {
+		set_din_type(&desc[idx], DMA_DLLI, ctx->enckey_dma_addr,
+			     ((ctx->enc_keylen == 24) ? CC_AES_KEY_SIZE_MAX :
+			      ctx->enc_keylen), NS_BIT);
+		set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	} else {
+		set_din_type(&desc[idx], DMA_DLLI, ctx->enckey_dma_addr,
+			     ctx->enc_keylen, NS_BIT);
+		set_key_size_des(&desc[idx], ctx->enc_keylen);
+	}
+	set_cipher_mode(&desc[idx], ctx->cipher_mode);
+	idx++;
+
+	*seq_size = idx;
+}
+
+static void cc_proc_cipher(struct aead_request *req, struct cc_hw_desc desc[],
+			   unsigned int *seq_size, unsigned int data_flow_mode)
+{
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	int direct = req_ctx->gen_ctx.op_type;
+	unsigned int idx = *seq_size;
+
+	if (req_ctx->cryptlen == 0)
+		return; /*null processing*/
+
+	cc_set_cipher_desc(req, desc, &idx);
+	cc_proc_cipher_desc(req, data_flow_mode, desc, &idx);
+	if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) {
+		/* We must wait for DMA to write all cipher */
+		hw_desc_init(&desc[idx]);
+		set_din_no_dma(&desc[idx], 0, 0xfffff0);
+		set_dout_no_dma(&desc[idx], 0, 0, 1);
+		idx++;
+	}
+
+	*seq_size = idx;
+}
+
+static void cc_set_hmac_desc(struct aead_request *req, struct cc_hw_desc desc[],
+			     unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	unsigned int hash_mode = (ctx->auth_mode == DRV_HASH_SHA1) ?
+				DRV_HASH_HW_SHA1 : DRV_HASH_HW_SHA256;
+	unsigned int digest_size = (ctx->auth_mode == DRV_HASH_SHA1) ?
+				CC_SHA1_DIGEST_SIZE : CC_SHA256_DIGEST_SIZE;
+	unsigned int idx = *seq_size;
+
+	/* Loading hash ipad xor key state */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], hash_mode);
+	set_din_type(&desc[idx], DMA_DLLI,
+		     ctx->auth_state.hmac.ipad_opad_dma_addr, digest_size,
+		     NS_BIT);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	idx++;
+
+	/* Load init. digest len (64 bytes) */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], hash_mode);
+	set_din_sram(&desc[idx], cc_digest_len_addr(ctx->drvdata, hash_mode),
+		     ctx->drvdata->hash_len_sz);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	idx++;
+
+	*seq_size = idx;
+}
+
+static void cc_set_xcbc_desc(struct aead_request *req, struct cc_hw_desc desc[],
+			     unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	unsigned int idx = *seq_size;
+
+	/* Loading MAC state */
+	hw_desc_init(&desc[idx]);
+	set_din_const(&desc[idx], 0, CC_AES_BLOCK_SIZE);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	idx++;
+
+	/* Setup XCBC MAC K1 */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI,
+		     ctx->auth_state.xcbc.xcbc_keys_dma_addr,
+		     AES_KEYSIZE_128, NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	idx++;
+
+	/* Setup XCBC MAC K2 */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI,
+		     (ctx->auth_state.xcbc.xcbc_keys_dma_addr +
+		      AES_KEYSIZE_128), AES_KEYSIZE_128, NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	idx++;
+
+	/* Setup XCBC MAC K3 */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI,
+		     (ctx->auth_state.xcbc.xcbc_keys_dma_addr +
+		      2 * AES_KEYSIZE_128), AES_KEYSIZE_128, NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE2);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	idx++;
+
+	*seq_size = idx;
+}
+
+static void cc_proc_header_desc(struct aead_request *req,
+				struct cc_hw_desc desc[],
+				unsigned int *seq_size)
+{
+	unsigned int idx = *seq_size;
+	/* Hash associated data */
+	if (req->assoclen > 0)
+		cc_set_assoc_desc(req, DIN_HASH, desc, &idx);
+
+	/* Hash IV */
+	*seq_size = idx;
+}
+
+static void cc_proc_scheme_desc(struct aead_request *req,
+				struct cc_hw_desc desc[],
+				unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct cc_aead_handle *aead_handle = ctx->drvdata->aead_handle;
+	unsigned int hash_mode = (ctx->auth_mode == DRV_HASH_SHA1) ?
+				DRV_HASH_HW_SHA1 : DRV_HASH_HW_SHA256;
+	unsigned int digest_size = (ctx->auth_mode == DRV_HASH_SHA1) ?
+				CC_SHA1_DIGEST_SIZE : CC_SHA256_DIGEST_SIZE;
+	unsigned int idx = *seq_size;
+
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], hash_mode);
+	set_dout_sram(&desc[idx], aead_handle->sram_workspace_addr,
+		      ctx->drvdata->hash_len_sz);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE1);
+	set_cipher_do(&desc[idx], DO_PAD);
+	idx++;
+
+	/* Get final ICV result */
+	hw_desc_init(&desc[idx]);
+	set_dout_sram(&desc[idx], aead_handle->sram_workspace_addr,
+		      digest_size);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	set_cipher_config0(&desc[idx], HASH_DIGEST_RESULT_LITTLE_ENDIAN);
+	set_cipher_mode(&desc[idx], hash_mode);
+	idx++;
+
+	/* Loading hash opad xor key state */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], hash_mode);
+	set_din_type(&desc[idx], DMA_DLLI,
+		     (ctx->auth_state.hmac.ipad_opad_dma_addr + digest_size),
+		     digest_size, NS_BIT);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	idx++;
+
+	/* Load init. digest len (64 bytes) */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], hash_mode);
+	set_din_sram(&desc[idx], cc_digest_len_addr(ctx->drvdata, hash_mode),
+		     ctx->drvdata->hash_len_sz);
+	set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	idx++;
+
+	/* Perform HASH update */
+	hw_desc_init(&desc[idx]);
+	set_din_sram(&desc[idx], aead_handle->sram_workspace_addr,
+		     digest_size);
+	set_flow_mode(&desc[idx], DIN_HASH);
+	idx++;
+
+	*seq_size = idx;
+}
+
+static void cc_mlli_to_sram(struct aead_request *req,
+			    struct cc_hw_desc desc[], unsigned int *seq_size)
+{
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	if (req_ctx->assoc_buff_type == CC_DMA_BUF_MLLI ||
+	    req_ctx->data_buff_type == CC_DMA_BUF_MLLI ||
+	    !req_ctx->is_single_pass) {
+		dev_dbg(dev, "Copy-to-sram: mlli_dma=%08x, mlli_size=%u\n",
+			(unsigned int)ctx->drvdata->mlli_sram_addr,
+			req_ctx->mlli_params.mlli_len);
+		/* Copy MLLI table host-to-sram */
+		hw_desc_init(&desc[*seq_size]);
+		set_din_type(&desc[*seq_size], DMA_DLLI,
+			     req_ctx->mlli_params.mlli_dma_addr,
+			     req_ctx->mlli_params.mlli_len, NS_BIT);
+		set_dout_sram(&desc[*seq_size],
+			      ctx->drvdata->mlli_sram_addr,
+			      req_ctx->mlli_params.mlli_len);
+		set_flow_mode(&desc[*seq_size], BYPASS);
+		(*seq_size)++;
+	}
+}
+
+static enum cc_flow_mode cc_get_data_flow(enum drv_crypto_direction direct,
+					  enum cc_flow_mode setup_flow_mode,
+					  bool is_single_pass)
+{
+	enum cc_flow_mode data_flow_mode;
+
+	if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) {
+		if (setup_flow_mode == S_DIN_to_AES)
+			data_flow_mode = is_single_pass ?
+				AES_to_HASH_and_DOUT : DIN_AES_DOUT;
+		else
+			data_flow_mode = is_single_pass ?
+				DES_to_HASH_and_DOUT : DIN_DES_DOUT;
+	} else { /* Decrypt */
+		if (setup_flow_mode == S_DIN_to_AES)
+			data_flow_mode = is_single_pass ?
+				AES_and_HASH : DIN_AES_DOUT;
+		else
+			data_flow_mode = is_single_pass ?
+				DES_and_HASH : DIN_DES_DOUT;
+	}
+
+	return data_flow_mode;
+}
+
+static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[],
+			    unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	int direct = req_ctx->gen_ctx.op_type;
+	unsigned int data_flow_mode =
+		cc_get_data_flow(direct, ctx->flow_mode,
+				 req_ctx->is_single_pass);
+
+	if (req_ctx->is_single_pass) {
+		/**
+		 * Single-pass flow
+		 */
+		cc_set_hmac_desc(req, desc, seq_size);
+		cc_set_cipher_desc(req, desc, seq_size);
+		cc_proc_header_desc(req, desc, seq_size);
+		cc_proc_cipher_desc(req, data_flow_mode, desc, seq_size);
+		cc_proc_scheme_desc(req, desc, seq_size);
+		cc_proc_digest_desc(req, desc, seq_size);
+		return;
+	}
+
+	/**
+	 * Double-pass flow
+	 * Fallback for unsupported single-pass modes,
+	 * i.e. using assoc. data of non-word-multiple
+	 */
+	if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) {
+		/* encrypt first.. */
+		cc_proc_cipher(req, desc, seq_size, data_flow_mode);
+		/* authenc after..*/
+		cc_set_hmac_desc(req, desc, seq_size);
+		cc_proc_authen_desc(req, DIN_HASH, desc, seq_size, direct);
+		cc_proc_scheme_desc(req, desc, seq_size);
+		cc_proc_digest_desc(req, desc, seq_size);
+
+	} else { /*DECRYPT*/
+		/* authenc first..*/
+		cc_set_hmac_desc(req, desc, seq_size);
+		cc_proc_authen_desc(req, DIN_HASH, desc, seq_size, direct);
+		cc_proc_scheme_desc(req, desc, seq_size);
+		/* decrypt after.. */
+		cc_proc_cipher(req, desc, seq_size, data_flow_mode);
+		/* read the digest result with setting the completion bit
+		 * must be after the cipher operation
+		 */
+		cc_proc_digest_desc(req, desc, seq_size);
+	}
+}
+
+static void
+cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[],
+		unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	int direct = req_ctx->gen_ctx.op_type;
+	unsigned int data_flow_mode =
+		cc_get_data_flow(direct, ctx->flow_mode,
+				 req_ctx->is_single_pass);
+
+	if (req_ctx->is_single_pass) {
+		/**
+		 * Single-pass flow
+		 */
+		cc_set_xcbc_desc(req, desc, seq_size);
+		cc_set_cipher_desc(req, desc, seq_size);
+		cc_proc_header_desc(req, desc, seq_size);
+		cc_proc_cipher_desc(req, data_flow_mode, desc, seq_size);
+		cc_proc_digest_desc(req, desc, seq_size);
+		return;
+	}
+
+	/**
+	 * Double-pass flow
+	 * Fallback for unsupported single-pass modes,
+	 * i.e. using assoc. data of non-word-multiple
+	 */
+	if (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) {
+		/* encrypt first.. */
+		cc_proc_cipher(req, desc, seq_size, data_flow_mode);
+		/* authenc after.. */
+		cc_set_xcbc_desc(req, desc, seq_size);
+		cc_proc_authen_desc(req, DIN_HASH, desc, seq_size, direct);
+		cc_proc_digest_desc(req, desc, seq_size);
+	} else { /*DECRYPT*/
+		/* authenc first.. */
+		cc_set_xcbc_desc(req, desc, seq_size);
+		cc_proc_authen_desc(req, DIN_HASH, desc, seq_size, direct);
+		/* decrypt after..*/
+		cc_proc_cipher(req, desc, seq_size, data_flow_mode);
+		/* read the digest result with setting the completion bit
+		 * must be after the cipher operation
+		 */
+		cc_proc_digest_desc(req, desc, seq_size);
+	}
+}
+
+static int validate_data_size(struct cc_aead_ctx *ctx,
+			      enum drv_crypto_direction direct,
+			      struct aead_request *req)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	unsigned int assoclen = req->assoclen;
+	unsigned int cipherlen = (direct == DRV_CRYPTO_DIRECTION_DECRYPT) ?
+			(req->cryptlen - ctx->authsize) : req->cryptlen;
+
+	if (direct == DRV_CRYPTO_DIRECTION_DECRYPT &&
+	    req->cryptlen < ctx->authsize)
+		goto data_size_err;
+
+	areq_ctx->is_single_pass = true; /*defaulted to fast flow*/
+
+	switch (ctx->flow_mode) {
+	case S_DIN_to_AES:
+		if (ctx->cipher_mode == DRV_CIPHER_CBC &&
+		    !IS_ALIGNED(cipherlen, AES_BLOCK_SIZE))
+			goto data_size_err;
+		if (ctx->cipher_mode == DRV_CIPHER_CCM)
+			break;
+		if (ctx->cipher_mode == DRV_CIPHER_GCTR) {
+			if (areq_ctx->plaintext_authenticate_only)
+				areq_ctx->is_single_pass = false;
+			break;
+		}
+
+		if (!IS_ALIGNED(assoclen, sizeof(u32)))
+			areq_ctx->is_single_pass = false;
+
+		if (ctx->cipher_mode == DRV_CIPHER_CTR &&
+		    !IS_ALIGNED(cipherlen, sizeof(u32)))
+			areq_ctx->is_single_pass = false;
+
+		break;
+	case S_DIN_to_DES:
+		if (!IS_ALIGNED(cipherlen, DES_BLOCK_SIZE))
+			goto data_size_err;
+		if (!IS_ALIGNED(assoclen, DES_BLOCK_SIZE))
+			areq_ctx->is_single_pass = false;
+		break;
+	default:
+		dev_err(dev, "Unexpected flow mode (%d)\n", ctx->flow_mode);
+		goto data_size_err;
+	}
+
+	return 0;
+
+data_size_err:
+	return -EINVAL;
+}
+
+static unsigned int format_ccm_a0(u8 *pa0_buff, u32 header_size)
+{
+	unsigned int len = 0;
+
+	if (header_size == 0)
+		return 0;
+
+	if (header_size < ((1UL << 16) - (1UL << 8))) {
+		len = 2;
+
+		pa0_buff[0] = (header_size >> 8) & 0xFF;
+		pa0_buff[1] = header_size & 0xFF;
+	} else {
+		len = 6;
+
+		pa0_buff[0] = 0xFF;
+		pa0_buff[1] = 0xFE;
+		pa0_buff[2] = (header_size >> 24) & 0xFF;
+		pa0_buff[3] = (header_size >> 16) & 0xFF;
+		pa0_buff[4] = (header_size >> 8) & 0xFF;
+		pa0_buff[5] = header_size & 0xFF;
+	}
+
+	return len;
+}
+
+static int set_msg_len(u8 *block, unsigned int msglen, unsigned int csize)
+{
+	__be32 data;
+
+	memset(block, 0, csize);
+	block += csize;
+
+	if (csize >= 4)
+		csize = 4;
+	else if (msglen > (1 << (8 * csize)))
+		return -EOVERFLOW;
+
+	data = cpu_to_be32(msglen);
+	memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
+
+	return 0;
+}
+
+static int cc_ccm(struct aead_request *req, struct cc_hw_desc desc[],
+		  unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	unsigned int idx = *seq_size;
+	unsigned int cipher_flow_mode;
+	dma_addr_t mac_result;
+
+	if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
+		cipher_flow_mode = AES_to_HASH_and_DOUT;
+		mac_result = req_ctx->mac_buf_dma_addr;
+	} else { /* Encrypt */
+		cipher_flow_mode = AES_and_HASH;
+		mac_result = req_ctx->icv_dma_addr;
+	}
+
+	/* load key */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_CTR);
+	set_din_type(&desc[idx], DMA_DLLI, ctx->enckey_dma_addr,
+		     ((ctx->enc_keylen == 24) ?  CC_AES_KEY_SIZE_MAX :
+		      ctx->enc_keylen), NS_BIT);
+	set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	/* load ctr state */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_CTR);
+	set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	set_din_type(&desc[idx], DMA_DLLI,
+		     req_ctx->gen_ctx.iv_dma_addr, AES_BLOCK_SIZE, NS_BIT);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	/* load MAC key */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_CBC_MAC);
+	set_din_type(&desc[idx], DMA_DLLI, ctx->enckey_dma_addr,
+		     ((ctx->enc_keylen == 24) ?  CC_AES_KEY_SIZE_MAX :
+		      ctx->enc_keylen), NS_BIT);
+	set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	idx++;
+
+	/* load MAC state */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_CBC_MAC);
+	set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	set_din_type(&desc[idx], DMA_DLLI, req_ctx->mac_buf_dma_addr,
+		     AES_BLOCK_SIZE, NS_BIT);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	idx++;
+
+	/* process assoc data */
+	if (req->assoclen > 0) {
+		cc_set_assoc_desc(req, DIN_HASH, desc, &idx);
+	} else {
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI,
+			     sg_dma_address(&req_ctx->ccm_adata_sg),
+			     AES_BLOCK_SIZE + req_ctx->ccm_hdr_size, NS_BIT);
+		set_flow_mode(&desc[idx], DIN_HASH);
+		idx++;
+	}
+
+	/* process the cipher */
+	if (req_ctx->cryptlen)
+		cc_proc_cipher_desc(req, cipher_flow_mode, desc, &idx);
+
+	/* Read temporal MAC */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_CBC_MAC);
+	set_dout_dlli(&desc[idx], req_ctx->mac_buf_dma_addr, ctx->authsize,
+		      NS_BIT, 0);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	set_cipher_config0(&desc[idx], HASH_DIGEST_RESULT_LITTLE_ENDIAN);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	set_aes_not_hash_mode(&desc[idx]);
+	idx++;
+
+	/* load AES-CTR state (for last MAC calculation)*/
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_CTR);
+	set_cipher_config0(&desc[idx], DRV_CRYPTO_DIRECTION_ENCRYPT);
+	set_din_type(&desc[idx], DMA_DLLI, req_ctx->ccm_iv0_dma_addr,
+		     AES_BLOCK_SIZE, NS_BIT);
+	set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	hw_desc_init(&desc[idx]);
+	set_din_no_dma(&desc[idx], 0, 0xfffff0);
+	set_dout_no_dma(&desc[idx], 0, 0, 1);
+	idx++;
+
+	/* encrypt the "T" value and store MAC in mac_state */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, req_ctx->mac_buf_dma_addr,
+		     ctx->authsize, NS_BIT);
+	set_dout_dlli(&desc[idx], mac_result, ctx->authsize, NS_BIT, 1);
+	set_queue_last_ind(ctx->drvdata, &desc[idx]);
+	set_flow_mode(&desc[idx], DIN_AES_DOUT);
+	idx++;
+
+	*seq_size = idx;
+	return 0;
+}
+
+static int config_ccm_adata(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	//unsigned int size_of_a = 0, rem_a_size = 0;
+	unsigned int lp = req->iv[0];
+	/* Note: The code assume that req->iv[0] already contains the value
+	 * of L' of RFC3610
+	 */
+	unsigned int l = lp + 1;  /* This is L' of RFC 3610. */
+	unsigned int m = ctx->authsize;  /* This is M' of RFC 3610. */
+	u8 *b0 = req_ctx->ccm_config + CCM_B0_OFFSET;
+	u8 *a0 = req_ctx->ccm_config + CCM_A0_OFFSET;
+	u8 *ctr_count_0 = req_ctx->ccm_config + CCM_CTR_COUNT_0_OFFSET;
+	unsigned int cryptlen = (req_ctx->gen_ctx.op_type ==
+				 DRV_CRYPTO_DIRECTION_ENCRYPT) ?
+				req->cryptlen :
+				(req->cryptlen - ctx->authsize);
+	int rc;
+
+	memset(req_ctx->mac_buf, 0, AES_BLOCK_SIZE);
+	memset(req_ctx->ccm_config, 0, AES_BLOCK_SIZE * 3);
+
+	/* taken from crypto/ccm.c */
+	/* 2 <= L <= 8, so 1 <= L' <= 7. */
+	if (l < 2 || l > 8) {
+		dev_err(dev, "illegal iv value %X\n", req->iv[0]);
+		return -EINVAL;
+	}
+	memcpy(b0, req->iv, AES_BLOCK_SIZE);
+
+	/* format control info per RFC 3610 and
+	 * NIST Special Publication 800-38C
+	 */
+	*b0 |= (8 * ((m - 2) / 2));
+	if (req->assoclen > 0)
+		*b0 |= 64;  /* Enable bit 6 if Adata exists. */
+
+	rc = set_msg_len(b0 + 16 - l, cryptlen, l);  /* Write L'. */
+	if (rc) {
+		dev_err(dev, "message len overflow detected");
+		return rc;
+	}
+	 /* END of "taken from crypto/ccm.c" */
+
+	/* l(a) - size of associated data. */
+	req_ctx->ccm_hdr_size = format_ccm_a0(a0, req->assoclen);
+
+	memset(req->iv + 15 - req->iv[0], 0, req->iv[0] + 1);
+	req->iv[15] = 1;
+
+	memcpy(ctr_count_0, req->iv, AES_BLOCK_SIZE);
+	ctr_count_0[15] = 0;
+
+	return 0;
+}
+
+static void cc_proc_rfc4309_ccm(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+
+	/* L' */
+	memset(areq_ctx->ctr_iv, 0, AES_BLOCK_SIZE);
+	/* For RFC 4309, always use 4 bytes for message length
+	 * (at most 2^32-1 bytes).
+	 */
+	areq_ctx->ctr_iv[0] = 3;
+
+	/* In RFC 4309 there is an 11-bytes nonce+IV part,
+	 * that we build here.
+	 */
+	memcpy(areq_ctx->ctr_iv + CCM_BLOCK_NONCE_OFFSET, ctx->ctr_nonce,
+	       CCM_BLOCK_NONCE_SIZE);
+	memcpy(areq_ctx->ctr_iv + CCM_BLOCK_IV_OFFSET, req->iv,
+	       CCM_BLOCK_IV_SIZE);
+	req->iv = areq_ctx->ctr_iv;
+	req->assoclen -= CCM_BLOCK_IV_SIZE;
+}
+
+static void cc_set_ghash_desc(struct aead_request *req,
+			      struct cc_hw_desc desc[], unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	unsigned int idx = *seq_size;
+
+	/* load key to AES*/
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_ECB);
+	set_cipher_config0(&desc[idx], DRV_CRYPTO_DIRECTION_ENCRYPT);
+	set_din_type(&desc[idx], DMA_DLLI, ctx->enckey_dma_addr,
+		     ctx->enc_keylen, NS_BIT);
+	set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	/* process one zero block to generate hkey */
+	hw_desc_init(&desc[idx]);
+	set_din_const(&desc[idx], 0x0, AES_BLOCK_SIZE);
+	set_dout_dlli(&desc[idx], req_ctx->hkey_dma_addr, AES_BLOCK_SIZE,
+		      NS_BIT, 0);
+	set_flow_mode(&desc[idx], DIN_AES_DOUT);
+	idx++;
+
+	/* Memory Barrier */
+	hw_desc_init(&desc[idx]);
+	set_din_no_dma(&desc[idx], 0, 0xfffff0);
+	set_dout_no_dma(&desc[idx], 0, 0, 1);
+	idx++;
+
+	/* Load GHASH subkey */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, req_ctx->hkey_dma_addr,
+		     AES_BLOCK_SIZE, NS_BIT);
+	set_dout_no_dma(&desc[idx], 0, 0, 1);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_HASH_HW_GHASH);
+	set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	idx++;
+
+	/* Configure Hash Engine to work with GHASH.
+	 * Since it was not possible to extend HASH submodes to add GHASH,
+	 * The following command is necessary in order to
+	 * select GHASH (according to HW designers)
+	 */
+	hw_desc_init(&desc[idx]);
+	set_din_no_dma(&desc[idx], 0, 0xfffff0);
+	set_dout_no_dma(&desc[idx], 0, 0, 1);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_HASH_HW_GHASH);
+	set_cipher_do(&desc[idx], 1); //1=AES_SK RKEK
+	set_cipher_config0(&desc[idx], DRV_CRYPTO_DIRECTION_ENCRYPT);
+	set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	idx++;
+
+	/* Load GHASH initial STATE (which is 0). (for any hash there is an
+	 * initial state)
+	 */
+	hw_desc_init(&desc[idx]);
+	set_din_const(&desc[idx], 0x0, AES_BLOCK_SIZE);
+	set_dout_no_dma(&desc[idx], 0, 0, 1);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_aes_not_hash_mode(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_HASH_HW_GHASH);
+	set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	idx++;
+
+	*seq_size = idx;
+}
+
+static void cc_set_gctr_desc(struct aead_request *req, struct cc_hw_desc desc[],
+			     unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	unsigned int idx = *seq_size;
+
+	/* load key to AES*/
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_GCTR);
+	set_cipher_config0(&desc[idx], DRV_CRYPTO_DIRECTION_ENCRYPT);
+	set_din_type(&desc[idx], DMA_DLLI, ctx->enckey_dma_addr,
+		     ctx->enc_keylen, NS_BIT);
+	set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	if (req_ctx->cryptlen && !req_ctx->plaintext_authenticate_only) {
+		/* load AES/CTR initial CTR value inc by 2*/
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], DRV_CIPHER_GCTR);
+		set_key_size_aes(&desc[idx], ctx->enc_keylen);
+		set_din_type(&desc[idx], DMA_DLLI,
+			     req_ctx->gcm_iv_inc2_dma_addr, AES_BLOCK_SIZE,
+			     NS_BIT);
+		set_cipher_config0(&desc[idx], DRV_CRYPTO_DIRECTION_ENCRYPT);
+		set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+		set_flow_mode(&desc[idx], S_DIN_to_AES);
+		idx++;
+	}
+
+	*seq_size = idx;
+}
+
+static void cc_proc_gcm_result(struct aead_request *req,
+			       struct cc_hw_desc desc[],
+			       unsigned int *seq_size)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	dma_addr_t mac_result;
+	unsigned int idx = *seq_size;
+
+	if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
+		mac_result = req_ctx->mac_buf_dma_addr;
+	} else { /* Encrypt */
+		mac_result = req_ctx->icv_dma_addr;
+	}
+
+	/* process(ghash) gcm_block_len */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, req_ctx->gcm_block_len_dma_addr,
+		     AES_BLOCK_SIZE, NS_BIT);
+	set_flow_mode(&desc[idx], DIN_HASH);
+	idx++;
+
+	/* Store GHASH state after GHASH(Associated Data + Cipher +LenBlock) */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_HASH_HW_GHASH);
+	set_din_no_dma(&desc[idx], 0, 0xfffff0);
+	set_dout_dlli(&desc[idx], req_ctx->mac_buf_dma_addr, AES_BLOCK_SIZE,
+		      NS_BIT, 0);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	set_aes_not_hash_mode(&desc[idx]);
+
+	idx++;
+
+	/* load AES/CTR initial CTR value inc by 1*/
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_GCTR);
+	set_key_size_aes(&desc[idx], ctx->enc_keylen);
+	set_din_type(&desc[idx], DMA_DLLI, req_ctx->gcm_iv_inc1_dma_addr,
+		     AES_BLOCK_SIZE, NS_BIT);
+	set_cipher_config0(&desc[idx], DRV_CRYPTO_DIRECTION_ENCRYPT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	/* Memory Barrier */
+	hw_desc_init(&desc[idx]);
+	set_din_no_dma(&desc[idx], 0, 0xfffff0);
+	set_dout_no_dma(&desc[idx], 0, 0, 1);
+	idx++;
+
+	/* process GCTR on stored GHASH and store MAC in mac_state*/
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_GCTR);
+	set_din_type(&desc[idx], DMA_DLLI, req_ctx->mac_buf_dma_addr,
+		     AES_BLOCK_SIZE, NS_BIT);
+	set_dout_dlli(&desc[idx], mac_result, ctx->authsize, NS_BIT, 1);
+	set_queue_last_ind(ctx->drvdata, &desc[idx]);
+	set_flow_mode(&desc[idx], DIN_AES_DOUT);
+	idx++;
+
+	*seq_size = idx;
+}
+
+static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[],
+		  unsigned int *seq_size)
+{
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	unsigned int cipher_flow_mode;
+
+	if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT) {
+		cipher_flow_mode = AES_and_HASH;
+	} else { /* Encrypt */
+		cipher_flow_mode = AES_to_HASH_and_DOUT;
+	}
+
+	//in RFC4543 no data to encrypt. just copy data from src to dest.
+	if (req_ctx->plaintext_authenticate_only) {
+		cc_proc_cipher_desc(req, BYPASS, desc, seq_size);
+		cc_set_ghash_desc(req, desc, seq_size);
+		/* process(ghash) assoc data */
+		cc_set_assoc_desc(req, DIN_HASH, desc, seq_size);
+		cc_set_gctr_desc(req, desc, seq_size);
+		cc_proc_gcm_result(req, desc, seq_size);
+		return 0;
+	}
+
+	// for gcm and rfc4106.
+	cc_set_ghash_desc(req, desc, seq_size);
+	/* process(ghash) assoc data */
+	if (req->assoclen > 0)
+		cc_set_assoc_desc(req, DIN_HASH, desc, seq_size);
+	cc_set_gctr_desc(req, desc, seq_size);
+	/* process(gctr+ghash) */
+	if (req_ctx->cryptlen)
+		cc_proc_cipher_desc(req, cipher_flow_mode, desc, seq_size);
+	cc_proc_gcm_result(req, desc, seq_size);
+
+	return 0;
+}
+
+static int config_gcm_context(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	unsigned int cryptlen = (req_ctx->gen_ctx.op_type ==
+				 DRV_CRYPTO_DIRECTION_ENCRYPT) ?
+				req->cryptlen :
+				(req->cryptlen - ctx->authsize);
+	__be32 counter = cpu_to_be32(2);
+
+	dev_dbg(dev, "%s() cryptlen = %d, req->assoclen = %d ctx->authsize = %d\n",
+		__func__, cryptlen, req->assoclen, ctx->authsize);
+
+	memset(req_ctx->hkey, 0, AES_BLOCK_SIZE);
+
+	memset(req_ctx->mac_buf, 0, AES_BLOCK_SIZE);
+
+	memcpy(req->iv + 12, &counter, 4);
+	memcpy(req_ctx->gcm_iv_inc2, req->iv, 16);
+
+	counter = cpu_to_be32(1);
+	memcpy(req->iv + 12, &counter, 4);
+	memcpy(req_ctx->gcm_iv_inc1, req->iv, 16);
+
+	if (!req_ctx->plaintext_authenticate_only) {
+		__be64 temp64;
+
+		temp64 = cpu_to_be64(req->assoclen * 8);
+		memcpy(&req_ctx->gcm_len_block.len_a, &temp64, sizeof(temp64));
+		temp64 = cpu_to_be64(cryptlen * 8);
+		memcpy(&req_ctx->gcm_len_block.len_c, &temp64, 8);
+	} else {
+		/* rfc4543=>  all data(AAD,IV,Plain) are considered additional
+		 * data that is nothing is encrypted.
+		 */
+		__be64 temp64;
+
+		temp64 = cpu_to_be64((req->assoclen + GCM_BLOCK_RFC4_IV_SIZE +
+				      cryptlen) * 8);
+		memcpy(&req_ctx->gcm_len_block.len_a, &temp64, sizeof(temp64));
+		temp64 = 0;
+		memcpy(&req_ctx->gcm_len_block.len_c, &temp64, 8);
+	}
+
+	return 0;
+}
+
+static void cc_proc_rfc4_gcm(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+
+	memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_NONCE_OFFSET,
+	       ctx->ctr_nonce, GCM_BLOCK_RFC4_NONCE_SIZE);
+	memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_IV_OFFSET, req->iv,
+	       GCM_BLOCK_RFC4_IV_SIZE);
+	req->iv = areq_ctx->ctr_iv;
+	req->assoclen -= GCM_BLOCK_RFC4_IV_SIZE;
+}
+
+static int cc_proc_aead(struct aead_request *req,
+			enum drv_crypto_direction direct)
+{
+	int rc = 0;
+	int seq_len = 0;
+	struct cc_hw_desc desc[MAX_AEAD_PROCESS_SEQ];
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct cc_crypto_req cc_req = {};
+
+	dev_dbg(dev, "%s context=%p req=%p iv=%p src=%p src_ofs=%d dst=%p dst_ofs=%d cryptolen=%d\n",
+		((direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ? "Enc" : "Dec"),
+		ctx, req, req->iv, sg_virt(req->src), req->src->offset,
+		sg_virt(req->dst), req->dst->offset, req->cryptlen);
+
+	/* STAT_PHASE_0: Init and sanity checks */
+
+	/* Check data length according to mode */
+	if (validate_data_size(ctx, direct, req)) {
+		dev_err(dev, "Unsupported crypt/assoc len %d/%d.\n",
+			req->cryptlen, req->assoclen);
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_BLOCK_LEN);
+		return -EINVAL;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = (void *)cc_aead_complete;
+	cc_req.user_arg = (void *)req;
+
+	/* Setup request context */
+	areq_ctx->gen_ctx.op_type = direct;
+	areq_ctx->req_authsize = ctx->authsize;
+	areq_ctx->cipher_mode = ctx->cipher_mode;
+
+	/* STAT_PHASE_1: Map buffers */
+
+	if (ctx->cipher_mode == DRV_CIPHER_CTR) {
+		/* Build CTR IV - Copy nonce from last 4 bytes in
+		 * CTR key to first 4 bytes in CTR IV
+		 */
+		memcpy(areq_ctx->ctr_iv, ctx->ctr_nonce,
+		       CTR_RFC3686_NONCE_SIZE);
+		if (!areq_ctx->backup_giv) /*User none-generated IV*/
+			memcpy(areq_ctx->ctr_iv + CTR_RFC3686_NONCE_SIZE,
+			       req->iv, CTR_RFC3686_IV_SIZE);
+		/* Initialize counter portion of counter block */
+		*(__be32 *)(areq_ctx->ctr_iv + CTR_RFC3686_NONCE_SIZE +
+			    CTR_RFC3686_IV_SIZE) = cpu_to_be32(1);
+
+		/* Replace with counter iv */
+		req->iv = areq_ctx->ctr_iv;
+		areq_ctx->hw_iv_size = CTR_RFC3686_BLOCK_SIZE;
+	} else if ((ctx->cipher_mode == DRV_CIPHER_CCM) ||
+		   (ctx->cipher_mode == DRV_CIPHER_GCTR)) {
+		areq_ctx->hw_iv_size = AES_BLOCK_SIZE;
+		if (areq_ctx->ctr_iv != req->iv) {
+			memcpy(areq_ctx->ctr_iv, req->iv,
+			       crypto_aead_ivsize(tfm));
+			req->iv = areq_ctx->ctr_iv;
+		}
+	}  else {
+		areq_ctx->hw_iv_size = crypto_aead_ivsize(tfm);
+	}
+
+	if (ctx->cipher_mode == DRV_CIPHER_CCM) {
+		rc = config_ccm_adata(req);
+		if (rc) {
+			dev_dbg(dev, "config_ccm_adata() returned with a failure %d!",
+				rc);
+			goto exit;
+		}
+	} else {
+		areq_ctx->ccm_hdr_size = ccm_header_size_null;
+	}
+
+	if (ctx->cipher_mode == DRV_CIPHER_GCTR) {
+		rc = config_gcm_context(req);
+		if (rc) {
+			dev_dbg(dev, "config_gcm_context() returned with a failure %d!",
+				rc);
+			goto exit;
+		}
+	}
+
+	rc = cc_map_aead_request(ctx->drvdata, req);
+	if (rc) {
+		dev_err(dev, "map_request() failed\n");
+		goto exit;
+	}
+
+	/* do we need to generate IV? */
+	if (areq_ctx->backup_giv) {
+		/* set the DMA mapped IV address*/
+		if (ctx->cipher_mode == DRV_CIPHER_CTR) {
+			cc_req.ivgen_dma_addr[0] =
+				areq_ctx->gen_ctx.iv_dma_addr +
+				CTR_RFC3686_NONCE_SIZE;
+			cc_req.ivgen_dma_addr_len = 1;
+		} else if (ctx->cipher_mode == DRV_CIPHER_CCM) {
+			/* In ccm, the IV needs to exist both inside B0 and
+			 * inside the counter.It is also copied to iv_dma_addr
+			 * for other reasons (like returning it to the user).
+			 * So, using 3 (identical) IV outputs.
+			 */
+			cc_req.ivgen_dma_addr[0] =
+				areq_ctx->gen_ctx.iv_dma_addr +
+				CCM_BLOCK_IV_OFFSET;
+			cc_req.ivgen_dma_addr[1] =
+				sg_dma_address(&areq_ctx->ccm_adata_sg) +
+				CCM_B0_OFFSET + CCM_BLOCK_IV_OFFSET;
+			cc_req.ivgen_dma_addr[2] =
+				sg_dma_address(&areq_ctx->ccm_adata_sg) +
+				CCM_CTR_COUNT_0_OFFSET + CCM_BLOCK_IV_OFFSET;
+			cc_req.ivgen_dma_addr_len = 3;
+		} else {
+			cc_req.ivgen_dma_addr[0] =
+				areq_ctx->gen_ctx.iv_dma_addr;
+			cc_req.ivgen_dma_addr_len = 1;
+		}
+
+		/* set the IV size (8/16 B long)*/
+		cc_req.ivgen_size = crypto_aead_ivsize(tfm);
+	}
+
+	/* STAT_PHASE_2: Create sequence */
+
+	/* Load MLLI tables to SRAM if necessary */
+	cc_mlli_to_sram(req, desc, &seq_len);
+
+	/*TODO: move seq len by reference */
+	switch (ctx->auth_mode) {
+	case DRV_HASH_SHA1:
+	case DRV_HASH_SHA256:
+		cc_hmac_authenc(req, desc, &seq_len);
+		break;
+	case DRV_HASH_XCBC_MAC:
+		cc_xcbc_authenc(req, desc, &seq_len);
+		break;
+	case DRV_HASH_NULL:
+		if (ctx->cipher_mode == DRV_CIPHER_CCM)
+			cc_ccm(req, desc, &seq_len);
+		if (ctx->cipher_mode == DRV_CIPHER_GCTR)
+			cc_gcm(req, desc, &seq_len);
+		break;
+	default:
+		dev_err(dev, "Unsupported authenc (%d)\n", ctx->auth_mode);
+		cc_unmap_aead_request(dev, req);
+		rc = -ENOTSUPP;
+		goto exit;
+	}
+
+	/* STAT_PHASE_3: Lock HW and push sequence */
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, seq_len, &req->base);
+
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_aead_request(dev, req);
+	}
+
+exit:
+	return rc;
+}
+
+static int cc_aead_encrypt(struct aead_request *req)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	int rc;
+
+	/* No generated IV required */
+	areq_ctx->backup_iv = req->iv;
+	areq_ctx->backup_giv = NULL;
+	areq_ctx->is_gcm4543 = false;
+
+	areq_ctx->plaintext_authenticate_only = false;
+
+	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
+	if (rc != -EINPROGRESS && rc != -EBUSY)
+		req->iv = areq_ctx->backup_iv;
+
+	return rc;
+}
+
+static int cc_rfc4309_ccm_encrypt(struct aead_request *req)
+{
+	/* Very similar to cc_aead_encrypt() above. */
+
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	int rc = -EINVAL;
+
+	if (!valid_assoclen(req)) {
+		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		goto out;
+	}
+
+	/* No generated IV required */
+	areq_ctx->backup_iv = req->iv;
+	areq_ctx->backup_giv = NULL;
+	areq_ctx->is_gcm4543 = true;
+
+	cc_proc_rfc4309_ccm(req);
+
+	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
+	if (rc != -EINPROGRESS && rc != -EBUSY)
+		req->iv = areq_ctx->backup_iv;
+out:
+	return rc;
+}
+
+static int cc_aead_decrypt(struct aead_request *req)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	int rc;
+
+	/* No generated IV required */
+	areq_ctx->backup_iv = req->iv;
+	areq_ctx->backup_giv = NULL;
+	areq_ctx->is_gcm4543 = false;
+
+	areq_ctx->plaintext_authenticate_only = false;
+
+	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
+	if (rc != -EINPROGRESS && rc != -EBUSY)
+		req->iv = areq_ctx->backup_iv;
+
+	return rc;
+}
+
+static int cc_rfc4309_ccm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	int rc = -EINVAL;
+
+	if (!valid_assoclen(req)) {
+		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		goto out;
+	}
+
+	/* No generated IV required */
+	areq_ctx->backup_iv = req->iv;
+	areq_ctx->backup_giv = NULL;
+
+	areq_ctx->is_gcm4543 = true;
+	cc_proc_rfc4309_ccm(req);
+
+	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
+	if (rc != -EINPROGRESS && rc != -EBUSY)
+		req->iv = areq_ctx->backup_iv;
+
+out:
+	return rc;
+}
+
+static int cc_rfc4106_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "%s()  keylen %d, key %p\n", __func__, keylen, key);
+
+	if (keylen < 4)
+		return -EINVAL;
+
+	keylen -= 4;
+	memcpy(ctx->ctr_nonce, key + keylen, 4);
+
+	return cc_aead_setkey(tfm, key, keylen);
+}
+
+static int cc_rfc4543_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "%s()  keylen %d, key %p\n", __func__, keylen, key);
+
+	if (keylen < 4)
+		return -EINVAL;
+
+	keylen -= 4;
+	memcpy(ctx->ctr_nonce, key + keylen, 4);
+
+	return cc_aead_setkey(tfm, key, keylen);
+}
+
+static int cc_gcm_setauthsize(struct crypto_aead *authenc,
+			      unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return cc_aead_setauthsize(authenc, authsize);
+}
+
+static int cc_rfc4106_gcm_setauthsize(struct crypto_aead *authenc,
+				      unsigned int authsize)
+{
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(authenc);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "authsize %d\n", authsize);
+
+	switch (authsize) {
+	case 8:
+	case 12:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return cc_aead_setauthsize(authenc, authsize);
+}
+
+static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc,
+				      unsigned int authsize)
+{
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(authenc);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "authsize %d\n", authsize);
+
+	if (authsize != 16)
+		return -EINVAL;
+
+	return cc_aead_setauthsize(authenc, authsize);
+}
+
+static int cc_rfc4106_gcm_encrypt(struct aead_request *req)
+{
+	/* Very similar to cc_aead_encrypt() above. */
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	int rc = -EINVAL;
+
+	if (!valid_assoclen(req)) {
+		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		goto out;
+	}
+
+	/* No generated IV required */
+	areq_ctx->backup_iv = req->iv;
+	areq_ctx->backup_giv = NULL;
+
+	areq_ctx->plaintext_authenticate_only = false;
+
+	cc_proc_rfc4_gcm(req);
+	areq_ctx->is_gcm4543 = true;
+
+	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
+	if (rc != -EINPROGRESS && rc != -EBUSY)
+		req->iv = areq_ctx->backup_iv;
+out:
+	return rc;
+}
+
+static int cc_rfc4543_gcm_encrypt(struct aead_request *req)
+{
+	/* Very similar to cc_aead_encrypt() above. */
+
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	int rc;
+
+	//plaintext is not encryped with rfc4543
+	areq_ctx->plaintext_authenticate_only = true;
+
+	/* No generated IV required */
+	areq_ctx->backup_iv = req->iv;
+	areq_ctx->backup_giv = NULL;
+
+	cc_proc_rfc4_gcm(req);
+	areq_ctx->is_gcm4543 = true;
+
+	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
+	if (rc != -EINPROGRESS && rc != -EBUSY)
+		req->iv = areq_ctx->backup_iv;
+
+	return rc;
+}
+
+static int cc_rfc4106_gcm_decrypt(struct aead_request *req)
+{
+	/* Very similar to cc_aead_decrypt() above. */
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	int rc = -EINVAL;
+
+	if (!valid_assoclen(req)) {
+		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		goto out;
+	}
+
+	/* No generated IV required */
+	areq_ctx->backup_iv = req->iv;
+	areq_ctx->backup_giv = NULL;
+
+	areq_ctx->plaintext_authenticate_only = false;
+
+	cc_proc_rfc4_gcm(req);
+	areq_ctx->is_gcm4543 = true;
+
+	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
+	if (rc != -EINPROGRESS && rc != -EBUSY)
+		req->iv = areq_ctx->backup_iv;
+out:
+	return rc;
+}
+
+static int cc_rfc4543_gcm_decrypt(struct aead_request *req)
+{
+	/* Very similar to cc_aead_decrypt() above. */
+
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	int rc;
+
+	//plaintext is not decryped with rfc4543
+	areq_ctx->plaintext_authenticate_only = true;
+
+	/* No generated IV required */
+	areq_ctx->backup_iv = req->iv;
+	areq_ctx->backup_giv = NULL;
+
+	cc_proc_rfc4_gcm(req);
+	areq_ctx->is_gcm4543 = true;
+
+	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
+	if (rc != -EINPROGRESS && rc != -EBUSY)
+		req->iv = areq_ctx->backup_iv;
+
+	return rc;
+}
+
+/* aead alg */
+static struct cc_alg_template aead_algs[] = {
+	{
+		.name = "authenc(hmac(sha1),cbc(aes))",
+		.driver_name = "authenc-hmac-sha1-cbc-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_aead_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_SHA1,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "authenc(hmac(sha1),cbc(des3_ede))",
+		.driver_name = "authenc-hmac-sha1-cbc-des3-ccree",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_aead_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_DES,
+		.auth_mode = DRV_HASH_SHA1,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "authenc(hmac(sha256),cbc(aes))",
+		.driver_name = "authenc-hmac-sha256-cbc-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_aead_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_SHA256,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "authenc(hmac(sha256),cbc(des3_ede))",
+		.driver_name = "authenc-hmac-sha256-cbc-des3-ccree",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_aead_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_DES,
+		.auth_mode = DRV_HASH_SHA256,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "authenc(xcbc(aes),cbc(aes))",
+		.driver_name = "authenc-xcbc-aes-cbc-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_aead_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_XCBC_MAC,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "authenc(hmac(sha1),rfc3686(ctr(aes)))",
+		.driver_name = "authenc-hmac-sha1-rfc3686-ctr-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_aead_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA1_DIGEST_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CTR,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_SHA1,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "authenc(hmac(sha256),rfc3686(ctr(aes)))",
+		.driver_name = "authenc-hmac-sha256-rfc3686-ctr-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_aead_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = SHA256_DIGEST_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CTR,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_SHA256,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "authenc(xcbc(aes),rfc3686(ctr(aes)))",
+		.driver_name = "authenc-xcbc-aes-rfc3686-ctr-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_aead_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = CTR_RFC3686_IV_SIZE,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CTR,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_XCBC_MAC,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "ccm(aes)",
+		.driver_name = "ccm-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_ccm_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = AES_BLOCK_SIZE,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CCM,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_NULL,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "rfc4309(ccm(aes))",
+		.driver_name = "rfc4309-ccm-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_rfc4309_ccm_setkey,
+			.setauthsize = cc_rfc4309_ccm_setauthsize,
+			.encrypt = cc_rfc4309_ccm_encrypt,
+			.decrypt = cc_rfc4309_ccm_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = CCM_BLOCK_IV_SIZE,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CCM,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_NULL,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "gcm(aes)",
+		.driver_name = "gcm-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_aead_setkey,
+			.setauthsize = cc_gcm_setauthsize,
+			.encrypt = cc_aead_encrypt,
+			.decrypt = cc_aead_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = 12,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_GCTR,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_NULL,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "rfc4106(gcm(aes))",
+		.driver_name = "rfc4106-gcm-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_rfc4106_gcm_setkey,
+			.setauthsize = cc_rfc4106_gcm_setauthsize,
+			.encrypt = cc_rfc4106_gcm_encrypt,
+			.decrypt = cc_rfc4106_gcm_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = GCM_BLOCK_RFC4_IV_SIZE,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_GCTR,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_NULL,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "rfc4543(gcm(aes))",
+		.driver_name = "rfc4543-gcm-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_AEAD,
+		.template_aead = {
+			.setkey = cc_rfc4543_gcm_setkey,
+			.setauthsize = cc_rfc4543_gcm_setauthsize,
+			.encrypt = cc_rfc4543_gcm_encrypt,
+			.decrypt = cc_rfc4543_gcm_decrypt,
+			.init = cc_aead_init,
+			.exit = cc_aead_exit,
+			.ivsize = GCM_BLOCK_RFC4_IV_SIZE,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_GCTR,
+		.flow_mode = S_DIN_to_AES,
+		.auth_mode = DRV_HASH_NULL,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+};
+
+static struct cc_crypto_alg *cc_create_aead_alg(struct cc_alg_template *tmpl,
+						struct device *dev)
+{
+	struct cc_crypto_alg *t_alg;
+	struct aead_alg *alg;
+
+	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
+	if (!t_alg)
+		return ERR_PTR(-ENOMEM);
+
+	alg = &tmpl->template_aead;
+
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 tmpl->driver_name);
+	alg->base.cra_module = THIS_MODULE;
+	alg->base.cra_priority = CC_CRA_PRIO;
+
+	alg->base.cra_ctxsize = sizeof(struct cc_aead_ctx);
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY |
+			 tmpl->type;
+	alg->init = cc_aead_init;
+	alg->exit = cc_aead_exit;
+
+	t_alg->aead_alg = *alg;
+
+	t_alg->cipher_mode = tmpl->cipher_mode;
+	t_alg->flow_mode = tmpl->flow_mode;
+	t_alg->auth_mode = tmpl->auth_mode;
+
+	return t_alg;
+}
+
+int cc_aead_free(struct cc_drvdata *drvdata)
+{
+	struct cc_crypto_alg *t_alg, *n;
+	struct cc_aead_handle *aead_handle =
+		(struct cc_aead_handle *)drvdata->aead_handle;
+
+	if (aead_handle) {
+		/* Remove registered algs */
+		list_for_each_entry_safe(t_alg, n, &aead_handle->aead_list,
+					 entry) {
+			crypto_unregister_aead(&t_alg->aead_alg);
+			list_del(&t_alg->entry);
+			kfree(t_alg);
+		}
+		kfree(aead_handle);
+		drvdata->aead_handle = NULL;
+	}
+
+	return 0;
+}
+
+int cc_aead_alloc(struct cc_drvdata *drvdata)
+{
+	struct cc_aead_handle *aead_handle;
+	struct cc_crypto_alg *t_alg;
+	int rc = -ENOMEM;
+	int alg;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	aead_handle = kmalloc(sizeof(*aead_handle), GFP_KERNEL);
+	if (!aead_handle) {
+		rc = -ENOMEM;
+		goto fail0;
+	}
+
+	INIT_LIST_HEAD(&aead_handle->aead_list);
+	drvdata->aead_handle = aead_handle;
+
+	aead_handle->sram_workspace_addr = cc_sram_alloc(drvdata,
+							 MAX_HMAC_DIGEST_SIZE);
+
+	if (aead_handle->sram_workspace_addr == NULL_SRAM_ADDR) {
+		dev_err(dev, "SRAM pool exhausted\n");
+		rc = -ENOMEM;
+		goto fail1;
+	}
+
+	/* Linux crypto */
+	for (alg = 0; alg < ARRAY_SIZE(aead_algs); alg++) {
+		if (aead_algs[alg].min_hw_rev > drvdata->hw_rev)
+			continue;
+
+		t_alg = cc_create_aead_alg(&aead_algs[alg], dev);
+		if (IS_ERR(t_alg)) {
+			rc = PTR_ERR(t_alg);
+			dev_err(dev, "%s alg allocation failed\n",
+				aead_algs[alg].driver_name);
+			goto fail1;
+		}
+		t_alg->drvdata = drvdata;
+		rc = crypto_register_aead(&t_alg->aead_alg);
+		if (rc) {
+			dev_err(dev, "%s alg registration failed\n",
+				t_alg->aead_alg.base.cra_driver_name);
+			goto fail2;
+		} else {
+			list_add_tail(&t_alg->entry, &aead_handle->aead_list);
+			dev_dbg(dev, "Registered %s\n",
+				t_alg->aead_alg.base.cra_driver_name);
+		}
+	}
+
+	return 0;
+
+fail2:
+	kfree(t_alg);
+fail1:
+	cc_aead_free(drvdata);
+fail0:
+	return rc;
+}
diff --git a/drivers/crypto/ccree/cc_aead.h b/drivers/crypto/ccree/cc_aead.h
new file mode 100644
index 000000000000..5edf3b351fa4
--- /dev/null
+++ b/drivers/crypto/ccree/cc_aead.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+/* \file cc_aead.h
+ * ARM CryptoCell AEAD Crypto API
+ */
+
+#ifndef __CC_AEAD_H__
+#define __CC_AEAD_H__
+
+#include <linux/kernel.h>
+#include <crypto/algapi.h>
+#include <crypto/ctr.h>
+
+/* mac_cmp - HW writes 8 B but all bytes hold the same value */
+#define ICV_CMP_SIZE 8
+#define CCM_CONFIG_BUF_SIZE (AES_BLOCK_SIZE * 3)
+#define MAX_MAC_SIZE SHA256_DIGEST_SIZE
+
+/* defines for AES GCM configuration buffer */
+#define GCM_BLOCK_LEN_SIZE 8
+
+#define GCM_BLOCK_RFC4_IV_OFFSET	4
+#define GCM_BLOCK_RFC4_IV_SIZE		8  /* IV size for rfc's */
+#define GCM_BLOCK_RFC4_NONCE_OFFSET	0
+#define GCM_BLOCK_RFC4_NONCE_SIZE	4
+
+/* Offsets into AES CCM configuration buffer */
+#define CCM_B0_OFFSET 0
+#define CCM_A0_OFFSET 16
+#define CCM_CTR_COUNT_0_OFFSET 32
+/* CCM B0 and CTR_COUNT constants. */
+#define CCM_BLOCK_NONCE_OFFSET 1  /* Nonce offset inside B0 and CTR_COUNT */
+#define CCM_BLOCK_NONCE_SIZE   3  /* Nonce size inside B0 and CTR_COUNT */
+#define CCM_BLOCK_IV_OFFSET    4  /* IV offset inside B0 and CTR_COUNT */
+#define CCM_BLOCK_IV_SIZE      8  /* IV size inside B0 and CTR_COUNT */
+
+enum aead_ccm_header_size {
+	ccm_header_size_null = -1,
+	ccm_header_size_zero = 0,
+	ccm_header_size_2 = 2,
+	ccm_header_size_6 = 6,
+	ccm_header_size_max = S32_MAX
+};
+
+struct aead_req_ctx {
+	/* Allocate cache line although only 4 bytes are needed to
+	 *  assure next field falls @ cache line
+	 *  Used for both: digest HW compare and CCM/GCM MAC value
+	 */
+	u8 mac_buf[MAX_MAC_SIZE] ____cacheline_aligned;
+	u8 ctr_iv[AES_BLOCK_SIZE] ____cacheline_aligned;
+
+	//used in gcm
+	u8 gcm_iv_inc1[AES_BLOCK_SIZE] ____cacheline_aligned;
+	u8 gcm_iv_inc2[AES_BLOCK_SIZE] ____cacheline_aligned;
+	u8 hkey[AES_BLOCK_SIZE] ____cacheline_aligned;
+	struct {
+		u8 len_a[GCM_BLOCK_LEN_SIZE] ____cacheline_aligned;
+		u8 len_c[GCM_BLOCK_LEN_SIZE];
+	} gcm_len_block;
+
+	u8 ccm_config[CCM_CONFIG_BUF_SIZE] ____cacheline_aligned;
+	/* HW actual size input */
+	unsigned int hw_iv_size ____cacheline_aligned;
+	/* used to prevent cache coherence problem */
+	u8 backup_mac[MAX_MAC_SIZE];
+	u8 *backup_iv; /*store iv for generated IV flow*/
+	u8 *backup_giv; /*store iv for rfc3686(ctr) flow*/
+	dma_addr_t mac_buf_dma_addr; /* internal ICV DMA buffer */
+	/* buffer for internal ccm configurations */
+	dma_addr_t ccm_iv0_dma_addr;
+	dma_addr_t icv_dma_addr; /* Phys. address of ICV */
+
+	//used in gcm
+	/* buffer for internal gcm configurations */
+	dma_addr_t gcm_iv_inc1_dma_addr;
+	/* buffer for internal gcm configurations */
+	dma_addr_t gcm_iv_inc2_dma_addr;
+	dma_addr_t hkey_dma_addr; /* Phys. address of hkey */
+	dma_addr_t gcm_block_len_dma_addr; /* Phys. address of gcm block len */
+	bool is_gcm4543;
+
+	u8 *icv_virt_addr; /* Virt. address of ICV */
+	struct async_gen_req_ctx gen_ctx;
+	struct cc_mlli assoc;
+	struct cc_mlli src;
+	struct cc_mlli dst;
+	struct scatterlist *src_sgl;
+	struct scatterlist *dst_sgl;
+	unsigned int src_offset;
+	unsigned int dst_offset;
+	enum cc_req_dma_buf_type assoc_buff_type;
+	enum cc_req_dma_buf_type data_buff_type;
+	struct mlli_params mlli_params;
+	unsigned int cryptlen;
+	struct scatterlist ccm_adata_sg;
+	enum aead_ccm_header_size ccm_hdr_size;
+	unsigned int req_authsize;
+	enum drv_cipher_mode cipher_mode;
+	bool is_icv_fragmented;
+	bool is_single_pass;
+	bool plaintext_authenticate_only; //for gcm_rfc4543
+};
+
+int cc_aead_alloc(struct cc_drvdata *drvdata);
+int cc_aead_free(struct cc_drvdata *drvdata);
+
+#endif /*__CC_AEAD_H__*/
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
new file mode 100644
index 000000000000..b32577477b4c
--- /dev/null
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -0,0 +1,1651 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <crypto/internal/aead.h>
+#include <crypto/authenc.h>
+#include <crypto/scatterwalk.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
+
+#include "cc_buffer_mgr.h"
+#include "cc_lli_defs.h"
+#include "cc_cipher.h"
+#include "cc_hash.h"
+#include "cc_aead.h"
+
+enum dma_buffer_type {
+	DMA_NULL_TYPE = -1,
+	DMA_SGL_TYPE = 1,
+	DMA_BUFF_TYPE = 2,
+};
+
+struct buff_mgr_handle {
+	struct dma_pool *mlli_buffs_pool;
+};
+
+union buffer_array_entry {
+	struct scatterlist *sgl;
+	dma_addr_t buffer_dma;
+};
+
+struct buffer_array {
+	unsigned int num_of_buffers;
+	union buffer_array_entry entry[MAX_NUM_OF_BUFFERS_IN_MLLI];
+	unsigned int offset[MAX_NUM_OF_BUFFERS_IN_MLLI];
+	int nents[MAX_NUM_OF_BUFFERS_IN_MLLI];
+	int total_data_len[MAX_NUM_OF_BUFFERS_IN_MLLI];
+	enum dma_buffer_type type[MAX_NUM_OF_BUFFERS_IN_MLLI];
+	bool is_last[MAX_NUM_OF_BUFFERS_IN_MLLI];
+	u32 *mlli_nents[MAX_NUM_OF_BUFFERS_IN_MLLI];
+};
+
+static inline char *cc_dma_buf_type(enum cc_req_dma_buf_type type)
+{
+	switch (type) {
+	case CC_DMA_BUF_NULL:
+		return "BUF_NULL";
+	case CC_DMA_BUF_DLLI:
+		return "BUF_DLLI";
+	case CC_DMA_BUF_MLLI:
+		return "BUF_MLLI";
+	default:
+		return "BUF_INVALID";
+	}
+}
+
+/**
+ * cc_copy_mac() - Copy MAC to temporary location
+ *
+ * @dev: device object
+ * @req: aead request object
+ * @dir: [IN] copy from/to sgl
+ */
+static void cc_copy_mac(struct device *dev, struct aead_request *req,
+			enum cc_sg_cpy_direct dir)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	u32 skip = req->assoclen + req->cryptlen;
+
+	if (areq_ctx->is_gcm4543)
+		skip += crypto_aead_ivsize(tfm);
+
+	cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src,
+			   (skip - areq_ctx->req_authsize), skip, dir);
+}
+
+/**
+ * cc_get_sgl_nents() - Get scatterlist number of entries.
+ *
+ * @sg_list: SG list
+ * @nbytes: [IN] Total SGL data bytes.
+ * @lbytes: [OUT] Returns the amount of bytes at the last entry
+ */
+static unsigned int cc_get_sgl_nents(struct device *dev,
+				     struct scatterlist *sg_list,
+				     unsigned int nbytes, u32 *lbytes,
+				     bool *is_chained)
+{
+	unsigned int nents = 0;
+
+	while (nbytes && sg_list) {
+		if (sg_list->length) {
+			nents++;
+			/* get the number of bytes in the last entry */
+			*lbytes = nbytes;
+			nbytes -= (sg_list->length > nbytes) ?
+					nbytes : sg_list->length;
+			sg_list = sg_next(sg_list);
+		} else {
+			sg_list = (struct scatterlist *)sg_page(sg_list);
+			if (is_chained)
+				*is_chained = true;
+		}
+	}
+	dev_dbg(dev, "nents %d last bytes %d\n", nents, *lbytes);
+	return nents;
+}
+
+/**
+ * cc_zero_sgl() - Zero scatter scatter list data.
+ *
+ * @sgl:
+ */
+void cc_zero_sgl(struct scatterlist *sgl, u32 data_len)
+{
+	struct scatterlist *current_sg = sgl;
+	int sg_index = 0;
+
+	while (sg_index <= data_len) {
+		if (!current_sg) {
+			/* reached the end of the sgl --> just return back */
+			return;
+		}
+		memset(sg_virt(current_sg), 0, current_sg->length);
+		sg_index += current_sg->length;
+		current_sg = sg_next(current_sg);
+	}
+}
+
+/**
+ * cc_copy_sg_portion() - Copy scatter list data,
+ * from to_skip to end, to dest and vice versa
+ *
+ * @dest:
+ * @sg:
+ * @to_skip:
+ * @end:
+ * @direct:
+ */
+void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg,
+			u32 to_skip, u32 end, enum cc_sg_cpy_direct direct)
+{
+	u32 nents, lbytes;
+
+	nents = cc_get_sgl_nents(dev, sg, end, &lbytes, NULL);
+	sg_copy_buffer(sg, nents, (void *)dest, (end - to_skip + 1), to_skip,
+		       (direct == CC_SG_TO_BUF));
+}
+
+static int cc_render_buff_to_mlli(struct device *dev, dma_addr_t buff_dma,
+				  u32 buff_size, u32 *curr_nents,
+				  u32 **mlli_entry_pp)
+{
+	u32 *mlli_entry_p = *mlli_entry_pp;
+	u32 new_nents;
+
+	/* Verify there is no memory overflow*/
+	new_nents = (*curr_nents + buff_size / CC_MAX_MLLI_ENTRY_SIZE + 1);
+	if (new_nents > MAX_NUM_OF_TOTAL_MLLI_ENTRIES)
+		return -ENOMEM;
+
+	/*handle buffer longer than 64 kbytes */
+	while (buff_size > CC_MAX_MLLI_ENTRY_SIZE) {
+		cc_lli_set_addr(mlli_entry_p, buff_dma);
+		cc_lli_set_size(mlli_entry_p, CC_MAX_MLLI_ENTRY_SIZE);
+		dev_dbg(dev, "entry[%d]: single_buff=0x%08X size=%08X\n",
+			*curr_nents, mlli_entry_p[LLI_WORD0_OFFSET],
+			mlli_entry_p[LLI_WORD1_OFFSET]);
+		buff_dma += CC_MAX_MLLI_ENTRY_SIZE;
+		buff_size -= CC_MAX_MLLI_ENTRY_SIZE;
+		mlli_entry_p = mlli_entry_p + 2;
+		(*curr_nents)++;
+	}
+	/*Last entry */
+	cc_lli_set_addr(mlli_entry_p, buff_dma);
+	cc_lli_set_size(mlli_entry_p, buff_size);
+	dev_dbg(dev, "entry[%d]: single_buff=0x%08X size=%08X\n",
+		*curr_nents, mlli_entry_p[LLI_WORD0_OFFSET],
+		mlli_entry_p[LLI_WORD1_OFFSET]);
+	mlli_entry_p = mlli_entry_p + 2;
+	*mlli_entry_pp = mlli_entry_p;
+	(*curr_nents)++;
+	return 0;
+}
+
+static int cc_render_sg_to_mlli(struct device *dev, struct scatterlist *sgl,
+				u32 sgl_data_len, u32 sgl_offset,
+				u32 *curr_nents, u32 **mlli_entry_pp)
+{
+	struct scatterlist *curr_sgl = sgl;
+	u32 *mlli_entry_p = *mlli_entry_pp;
+	s32 rc = 0;
+
+	for ( ; (curr_sgl && sgl_data_len);
+	      curr_sgl = sg_next(curr_sgl)) {
+		u32 entry_data_len =
+			(sgl_data_len > sg_dma_len(curr_sgl) - sgl_offset) ?
+				sg_dma_len(curr_sgl) - sgl_offset :
+				sgl_data_len;
+		sgl_data_len -= entry_data_len;
+		rc = cc_render_buff_to_mlli(dev, sg_dma_address(curr_sgl) +
+					    sgl_offset, entry_data_len,
+					    curr_nents, &mlli_entry_p);
+		if (rc)
+			return rc;
+
+		sgl_offset = 0;
+	}
+	*mlli_entry_pp = mlli_entry_p;
+	return 0;
+}
+
+static int cc_generate_mlli(struct device *dev, struct buffer_array *sg_data,
+			    struct mlli_params *mlli_params, gfp_t flags)
+{
+	u32 *mlli_p;
+	u32 total_nents = 0, prev_total_nents = 0;
+	int rc = 0, i;
+
+	dev_dbg(dev, "NUM of SG's = %d\n", sg_data->num_of_buffers);
+
+	/* Allocate memory from the pointed pool */
+	mlli_params->mlli_virt_addr =
+		dma_pool_alloc(mlli_params->curr_pool, flags,
+			       &mlli_params->mlli_dma_addr);
+	if (!mlli_params->mlli_virt_addr) {
+		dev_err(dev, "dma_pool_alloc() failed\n");
+		rc = -ENOMEM;
+		goto build_mlli_exit;
+	}
+	/* Point to start of MLLI */
+	mlli_p = (u32 *)mlli_params->mlli_virt_addr;
+	/* go over all SG's and link it to one MLLI table */
+	for (i = 0; i < sg_data->num_of_buffers; i++) {
+		union buffer_array_entry *entry = &sg_data->entry[i];
+		u32 tot_len = sg_data->total_data_len[i];
+		u32 offset = sg_data->offset[i];
+
+		if (sg_data->type[i] == DMA_SGL_TYPE)
+			rc = cc_render_sg_to_mlli(dev, entry->sgl, tot_len,
+						  offset, &total_nents,
+						  &mlli_p);
+		else /*DMA_BUFF_TYPE*/
+			rc = cc_render_buff_to_mlli(dev, entry->buffer_dma,
+						    tot_len, &total_nents,
+						    &mlli_p);
+		if (rc)
+			return rc;
+
+		/* set last bit in the current table */
+		if (sg_data->mlli_nents[i]) {
+			/*Calculate the current MLLI table length for the
+			 *length field in the descriptor
+			 */
+			*sg_data->mlli_nents[i] +=
+				(total_nents - prev_total_nents);
+			prev_total_nents = total_nents;
+		}
+	}
+
+	/* Set MLLI size for the bypass operation */
+	mlli_params->mlli_len = (total_nents * LLI_ENTRY_BYTE_SIZE);
+
+	dev_dbg(dev, "MLLI params: virt_addr=%pK dma_addr=%pad mlli_len=0x%X\n",
+		mlli_params->mlli_virt_addr, &mlli_params->mlli_dma_addr,
+		mlli_params->mlli_len);
+
+build_mlli_exit:
+	return rc;
+}
+
+static void cc_add_buffer_entry(struct device *dev,
+				struct buffer_array *sgl_data,
+				dma_addr_t buffer_dma, unsigned int buffer_len,
+				bool is_last_entry, u32 *mlli_nents)
+{
+	unsigned int index = sgl_data->num_of_buffers;
+
+	dev_dbg(dev, "index=%u single_buff=%pad buffer_len=0x%08X is_last=%d\n",
+		index, &buffer_dma, buffer_len, is_last_entry);
+	sgl_data->nents[index] = 1;
+	sgl_data->entry[index].buffer_dma = buffer_dma;
+	sgl_data->offset[index] = 0;
+	sgl_data->total_data_len[index] = buffer_len;
+	sgl_data->type[index] = DMA_BUFF_TYPE;
+	sgl_data->is_last[index] = is_last_entry;
+	sgl_data->mlli_nents[index] = mlli_nents;
+	if (sgl_data->mlli_nents[index])
+		*sgl_data->mlli_nents[index] = 0;
+	sgl_data->num_of_buffers++;
+}
+
+static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data,
+			    unsigned int nents, struct scatterlist *sgl,
+			    unsigned int data_len, unsigned int data_offset,
+			    bool is_last_table, u32 *mlli_nents)
+{
+	unsigned int index = sgl_data->num_of_buffers;
+
+	dev_dbg(dev, "index=%u nents=%u sgl=%pK data_len=0x%08X is_last=%d\n",
+		index, nents, sgl, data_len, is_last_table);
+	sgl_data->nents[index] = nents;
+	sgl_data->entry[index].sgl = sgl;
+	sgl_data->offset[index] = data_offset;
+	sgl_data->total_data_len[index] = data_len;
+	sgl_data->type[index] = DMA_SGL_TYPE;
+	sgl_data->is_last[index] = is_last_table;
+	sgl_data->mlli_nents[index] = mlli_nents;
+	if (sgl_data->mlli_nents[index])
+		*sgl_data->mlli_nents[index] = 0;
+	sgl_data->num_of_buffers++;
+}
+
+static int cc_dma_map_sg(struct device *dev, struct scatterlist *sg, u32 nents,
+			 enum dma_data_direction direction)
+{
+	u32 i, j;
+	struct scatterlist *l_sg = sg;
+
+	for (i = 0; i < nents; i++) {
+		if (!l_sg)
+			break;
+		if (dma_map_sg(dev, l_sg, 1, direction) != 1) {
+			dev_err(dev, "dma_map_page() sg buffer failed\n");
+			goto err;
+		}
+		l_sg = sg_next(l_sg);
+	}
+	return nents;
+
+err:
+	/* Restore mapped parts */
+	for (j = 0; j < i; j++) {
+		if (!sg)
+			break;
+		dma_unmap_sg(dev, sg, 1, direction);
+		sg = sg_next(sg);
+	}
+	return 0;
+}
+
+static int cc_map_sg(struct device *dev, struct scatterlist *sg,
+		     unsigned int nbytes, int direction, u32 *nents,
+		     u32 max_sg_nents, u32 *lbytes, u32 *mapped_nents)
+{
+	bool is_chained = false;
+
+	if (sg_is_last(sg)) {
+		/* One entry only case -set to DLLI */
+		if (dma_map_sg(dev, sg, 1, direction) != 1) {
+			dev_err(dev, "dma_map_sg() single buffer failed\n");
+			return -ENOMEM;
+		}
+		dev_dbg(dev, "Mapped sg: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n",
+			&sg_dma_address(sg), sg_page(sg), sg_virt(sg),
+			sg->offset, sg->length);
+		*lbytes = nbytes;
+		*nents = 1;
+		*mapped_nents = 1;
+	} else {  /*sg_is_last*/
+		*nents = cc_get_sgl_nents(dev, sg, nbytes, lbytes,
+					  &is_chained);
+		if (*nents > max_sg_nents) {
+			*nents = 0;
+			dev_err(dev, "Too many fragments. current %d max %d\n",
+				*nents, max_sg_nents);
+			return -ENOMEM;
+		}
+		if (!is_chained) {
+			/* In case of mmu the number of mapped nents might
+			 * be changed from the original sgl nents
+			 */
+			*mapped_nents = dma_map_sg(dev, sg, *nents, direction);
+			if (*mapped_nents == 0) {
+				*nents = 0;
+				dev_err(dev, "dma_map_sg() sg buffer failed\n");
+				return -ENOMEM;
+			}
+		} else {
+			/*In this case the driver maps entry by entry so it
+			 * must have the same nents before and after map
+			 */
+			*mapped_nents = cc_dma_map_sg(dev, sg, *nents,
+						      direction);
+			if (*mapped_nents != *nents) {
+				*nents = *mapped_nents;
+				dev_err(dev, "dma_map_sg() sg buffer failed\n");
+				return -ENOMEM;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int
+cc_set_aead_conf_buf(struct device *dev, struct aead_req_ctx *areq_ctx,
+		     u8 *config_data, struct buffer_array *sg_data,
+		     unsigned int assoclen)
+{
+	dev_dbg(dev, " handle additional data config set to DLLI\n");
+	/* create sg for the current buffer */
+	sg_init_one(&areq_ctx->ccm_adata_sg, config_data,
+		    AES_BLOCK_SIZE + areq_ctx->ccm_hdr_size);
+	if (dma_map_sg(dev, &areq_ctx->ccm_adata_sg, 1, DMA_TO_DEVICE) != 1) {
+		dev_err(dev, "dma_map_sg() config buffer failed\n");
+		return -ENOMEM;
+	}
+	dev_dbg(dev, "Mapped curr_buff: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n",
+		&sg_dma_address(&areq_ctx->ccm_adata_sg),
+		sg_page(&areq_ctx->ccm_adata_sg),
+		sg_virt(&areq_ctx->ccm_adata_sg),
+		areq_ctx->ccm_adata_sg.offset, areq_ctx->ccm_adata_sg.length);
+	/* prepare for case of MLLI */
+	if (assoclen > 0) {
+		cc_add_sg_entry(dev, sg_data, 1, &areq_ctx->ccm_adata_sg,
+				(AES_BLOCK_SIZE + areq_ctx->ccm_hdr_size),
+				0, false, NULL);
+	}
+	return 0;
+}
+
+static int cc_set_hash_buf(struct device *dev, struct ahash_req_ctx *areq_ctx,
+			   u8 *curr_buff, u32 curr_buff_cnt,
+			   struct buffer_array *sg_data)
+{
+	dev_dbg(dev, " handle curr buff %x set to   DLLI\n", curr_buff_cnt);
+	/* create sg for the current buffer */
+	sg_init_one(areq_ctx->buff_sg, curr_buff, curr_buff_cnt);
+	if (dma_map_sg(dev, areq_ctx->buff_sg, 1, DMA_TO_DEVICE) != 1) {
+		dev_err(dev, "dma_map_sg() src buffer failed\n");
+		return -ENOMEM;
+	}
+	dev_dbg(dev, "Mapped curr_buff: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n",
+		&sg_dma_address(areq_ctx->buff_sg), sg_page(areq_ctx->buff_sg),
+		sg_virt(areq_ctx->buff_sg), areq_ctx->buff_sg->offset,
+		areq_ctx->buff_sg->length);
+	areq_ctx->data_dma_buf_type = CC_DMA_BUF_DLLI;
+	areq_ctx->curr_sg = areq_ctx->buff_sg;
+	areq_ctx->in_nents = 0;
+	/* prepare for case of MLLI */
+	cc_add_sg_entry(dev, sg_data, 1, areq_ctx->buff_sg, curr_buff_cnt, 0,
+			false, NULL);
+	return 0;
+}
+
+void cc_unmap_cipher_request(struct device *dev, void *ctx,
+				unsigned int ivsize, struct scatterlist *src,
+				struct scatterlist *dst)
+{
+	struct cipher_req_ctx *req_ctx = (struct cipher_req_ctx *)ctx;
+
+	if (req_ctx->gen_ctx.iv_dma_addr) {
+		dev_dbg(dev, "Unmapped iv: iv_dma_addr=%pad iv_size=%u\n",
+			&req_ctx->gen_ctx.iv_dma_addr, ivsize);
+		dma_unmap_single(dev, req_ctx->gen_ctx.iv_dma_addr,
+				 ivsize,
+				 req_ctx->is_giv ? DMA_BIDIRECTIONAL :
+				 DMA_TO_DEVICE);
+	}
+	/* Release pool */
+	if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI &&
+	    req_ctx->mlli_params.mlli_virt_addr) {
+		dma_pool_free(req_ctx->mlli_params.curr_pool,
+			      req_ctx->mlli_params.mlli_virt_addr,
+			      req_ctx->mlli_params.mlli_dma_addr);
+	}
+
+	dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL);
+	dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src));
+
+	if (src != dst) {
+		dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_BIDIRECTIONAL);
+		dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst));
+	}
+}
+
+int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
+			  unsigned int ivsize, unsigned int nbytes,
+			  void *info, struct scatterlist *src,
+			  struct scatterlist *dst, gfp_t flags)
+{
+	struct cipher_req_ctx *req_ctx = (struct cipher_req_ctx *)ctx;
+	struct mlli_params *mlli_params = &req_ctx->mlli_params;
+	struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
+	struct device *dev = drvdata_to_dev(drvdata);
+	struct buffer_array sg_data;
+	u32 dummy = 0;
+	int rc = 0;
+	u32 mapped_nents = 0;
+
+	req_ctx->dma_buf_type = CC_DMA_BUF_DLLI;
+	mlli_params->curr_pool = NULL;
+	sg_data.num_of_buffers = 0;
+
+	/* Map IV buffer */
+	if (ivsize) {
+		dump_byte_array("iv", (u8 *)info, ivsize);
+		req_ctx->gen_ctx.iv_dma_addr =
+			dma_map_single(dev, (void *)info,
+				       ivsize,
+				       req_ctx->is_giv ? DMA_BIDIRECTIONAL :
+				       DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, req_ctx->gen_ctx.iv_dma_addr)) {
+			dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n",
+				ivsize, info);
+			return -ENOMEM;
+		}
+		dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n",
+			ivsize, info, &req_ctx->gen_ctx.iv_dma_addr);
+	} else {
+		req_ctx->gen_ctx.iv_dma_addr = 0;
+	}
+
+	/* Map the src SGL */
+	rc = cc_map_sg(dev, src, nbytes, DMA_BIDIRECTIONAL, &req_ctx->in_nents,
+		       LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents);
+	if (rc) {
+		rc = -ENOMEM;
+		goto cipher_exit;
+	}
+	if (mapped_nents > 1)
+		req_ctx->dma_buf_type = CC_DMA_BUF_MLLI;
+
+	if (src == dst) {
+		/* Handle inplace operation */
+		if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI) {
+			req_ctx->out_nents = 0;
+			cc_add_sg_entry(dev, &sg_data, req_ctx->in_nents, src,
+					nbytes, 0, true,
+					&req_ctx->in_mlli_nents);
+		}
+	} else {
+		/* Map the dst sg */
+		if (cc_map_sg(dev, dst, nbytes, DMA_BIDIRECTIONAL,
+			      &req_ctx->out_nents, LLI_MAX_NUM_OF_DATA_ENTRIES,
+			      &dummy, &mapped_nents)) {
+			rc = -ENOMEM;
+			goto cipher_exit;
+		}
+		if (mapped_nents > 1)
+			req_ctx->dma_buf_type = CC_DMA_BUF_MLLI;
+
+		if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI) {
+			cc_add_sg_entry(dev, &sg_data, req_ctx->in_nents, src,
+					nbytes, 0, true,
+					&req_ctx->in_mlli_nents);
+			cc_add_sg_entry(dev, &sg_data, req_ctx->out_nents, dst,
+					nbytes, 0, true,
+					&req_ctx->out_mlli_nents);
+		}
+	}
+
+	if (req_ctx->dma_buf_type == CC_DMA_BUF_MLLI) {
+		mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+		rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags);
+		if (rc)
+			goto cipher_exit;
+	}
+
+	dev_dbg(dev, "areq_ctx->dma_buf_type = %s\n",
+		cc_dma_buf_type(req_ctx->dma_buf_type));
+
+	return 0;
+
+cipher_exit:
+	cc_unmap_cipher_request(dev, req_ctx, ivsize, src, dst);
+	return rc;
+}
+
+void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	unsigned int hw_iv_size = areq_ctx->hw_iv_size;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
+	u32 dummy;
+	bool chained;
+	u32 size_to_unmap = 0;
+
+	if (areq_ctx->mac_buf_dma_addr) {
+		dma_unmap_single(dev, areq_ctx->mac_buf_dma_addr,
+				 MAX_MAC_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	if (areq_ctx->cipher_mode == DRV_CIPHER_GCTR) {
+		if (areq_ctx->hkey_dma_addr) {
+			dma_unmap_single(dev, areq_ctx->hkey_dma_addr,
+					 AES_BLOCK_SIZE, DMA_BIDIRECTIONAL);
+		}
+
+		if (areq_ctx->gcm_block_len_dma_addr) {
+			dma_unmap_single(dev, areq_ctx->gcm_block_len_dma_addr,
+					 AES_BLOCK_SIZE, DMA_TO_DEVICE);
+		}
+
+		if (areq_ctx->gcm_iv_inc1_dma_addr) {
+			dma_unmap_single(dev, areq_ctx->gcm_iv_inc1_dma_addr,
+					 AES_BLOCK_SIZE, DMA_TO_DEVICE);
+		}
+
+		if (areq_ctx->gcm_iv_inc2_dma_addr) {
+			dma_unmap_single(dev, areq_ctx->gcm_iv_inc2_dma_addr,
+					 AES_BLOCK_SIZE, DMA_TO_DEVICE);
+		}
+	}
+
+	if (areq_ctx->ccm_hdr_size != ccm_header_size_null) {
+		if (areq_ctx->ccm_iv0_dma_addr) {
+			dma_unmap_single(dev, areq_ctx->ccm_iv0_dma_addr,
+					 AES_BLOCK_SIZE, DMA_TO_DEVICE);
+		}
+
+		dma_unmap_sg(dev, &areq_ctx->ccm_adata_sg, 1, DMA_TO_DEVICE);
+	}
+	if (areq_ctx->gen_ctx.iv_dma_addr) {
+		dma_unmap_single(dev, areq_ctx->gen_ctx.iv_dma_addr,
+				 hw_iv_size, DMA_BIDIRECTIONAL);
+	}
+
+	/*In case a pool was set, a table was
+	 *allocated and should be released
+	 */
+	if (areq_ctx->mlli_params.curr_pool) {
+		dev_dbg(dev, "free MLLI buffer: dma=%pad virt=%pK\n",
+			&areq_ctx->mlli_params.mlli_dma_addr,
+			areq_ctx->mlli_params.mlli_virt_addr);
+		dma_pool_free(areq_ctx->mlli_params.curr_pool,
+			      areq_ctx->mlli_params.mlli_virt_addr,
+			      areq_ctx->mlli_params.mlli_dma_addr);
+	}
+
+	dev_dbg(dev, "Unmapping src sgl: req->src=%pK areq_ctx->src.nents=%u areq_ctx->assoc.nents=%u assoclen:%u cryptlen=%u\n",
+		sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents,
+		req->assoclen, req->cryptlen);
+	size_to_unmap = req->assoclen + req->cryptlen;
+	if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT)
+		size_to_unmap += areq_ctx->req_authsize;
+	if (areq_ctx->is_gcm4543)
+		size_to_unmap += crypto_aead_ivsize(tfm);
+
+	dma_unmap_sg(dev, req->src,
+		     cc_get_sgl_nents(dev, req->src, size_to_unmap,
+				      &dummy, &chained),
+		     DMA_BIDIRECTIONAL);
+	if (req->src != req->dst) {
+		dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n",
+			sg_virt(req->dst));
+		dma_unmap_sg(dev, req->dst,
+			     cc_get_sgl_nents(dev, req->dst, size_to_unmap,
+					      &dummy, &chained),
+			     DMA_BIDIRECTIONAL);
+	}
+	if (drvdata->coherent &&
+	    areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT &&
+	    req->src == req->dst) {
+		/* copy back mac from temporary location to deal with possible
+		 * data memory overriding that caused by cache coherence
+		 * problem.
+		 */
+		cc_copy_mac(dev, req, CC_SG_FROM_BUF);
+	}
+}
+
+static int cc_get_aead_icv_nents(struct device *dev, struct scatterlist *sgl,
+				 unsigned int sgl_nents, unsigned int authsize,
+				 u32 last_entry_data_size,
+				 bool *is_icv_fragmented)
+{
+	unsigned int icv_max_size = 0;
+	unsigned int icv_required_size = authsize > last_entry_data_size ?
+					(authsize - last_entry_data_size) :
+					authsize;
+	unsigned int nents;
+	unsigned int i;
+
+	if (sgl_nents < MAX_ICV_NENTS_SUPPORTED) {
+		*is_icv_fragmented = false;
+		return 0;
+	}
+
+	for (i = 0 ; i < (sgl_nents - MAX_ICV_NENTS_SUPPORTED) ; i++) {
+		if (!sgl)
+			break;
+		sgl = sg_next(sgl);
+	}
+
+	if (sgl)
+		icv_max_size = sgl->length;
+
+	if (last_entry_data_size > authsize) {
+		/* ICV attached to data in last entry (not fragmented!) */
+		nents = 0;
+		*is_icv_fragmented = false;
+	} else if (last_entry_data_size == authsize) {
+		/* ICV placed in whole last entry (not fragmented!) */
+		nents = 1;
+		*is_icv_fragmented = false;
+	} else if (icv_max_size > icv_required_size) {
+		nents = 1;
+		*is_icv_fragmented = true;
+	} else if (icv_max_size == icv_required_size) {
+		nents = 2;
+		*is_icv_fragmented = true;
+	} else {
+		dev_err(dev, "Unsupported num. of ICV fragments (> %d)\n",
+			MAX_ICV_NENTS_SUPPORTED);
+		nents = -1; /*unsupported*/
+	}
+	dev_dbg(dev, "is_frag=%s icv_nents=%u\n",
+		(*is_icv_fragmented ? "true" : "false"), nents);
+
+	return nents;
+}
+
+static int cc_aead_chain_iv(struct cc_drvdata *drvdata,
+			    struct aead_request *req,
+			    struct buffer_array *sg_data,
+			    bool is_last, bool do_chain)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	unsigned int hw_iv_size = areq_ctx->hw_iv_size;
+	struct device *dev = drvdata_to_dev(drvdata);
+	int rc = 0;
+
+	if (!req->iv) {
+		areq_ctx->gen_ctx.iv_dma_addr = 0;
+		goto chain_iv_exit;
+	}
+
+	areq_ctx->gen_ctx.iv_dma_addr = dma_map_single(dev, req->iv,
+						       hw_iv_size,
+						       DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, areq_ctx->gen_ctx.iv_dma_addr)) {
+		dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n",
+			hw_iv_size, req->iv);
+		rc = -ENOMEM;
+		goto chain_iv_exit;
+	}
+
+	dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n",
+		hw_iv_size, req->iv, &areq_ctx->gen_ctx.iv_dma_addr);
+	// TODO: what about CTR?? ask Ron
+	if (do_chain && areq_ctx->plaintext_authenticate_only) {
+		struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+		unsigned int iv_size_to_authenc = crypto_aead_ivsize(tfm);
+		unsigned int iv_ofs = GCM_BLOCK_RFC4_IV_OFFSET;
+		/* Chain to given list */
+		cc_add_buffer_entry(dev, sg_data,
+				    (areq_ctx->gen_ctx.iv_dma_addr + iv_ofs),
+				    iv_size_to_authenc, is_last,
+				    &areq_ctx->assoc.mlli_nents);
+		areq_ctx->assoc_buff_type = CC_DMA_BUF_MLLI;
+	}
+
+chain_iv_exit:
+	return rc;
+}
+
+static int cc_aead_chain_assoc(struct cc_drvdata *drvdata,
+			       struct aead_request *req,
+			       struct buffer_array *sg_data,
+			       bool is_last, bool do_chain)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	int rc = 0;
+	u32 mapped_nents = 0;
+	struct scatterlist *current_sg = req->src;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	unsigned int sg_index = 0;
+	u32 size_of_assoc = req->assoclen;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	if (areq_ctx->is_gcm4543)
+		size_of_assoc += crypto_aead_ivsize(tfm);
+
+	if (!sg_data) {
+		rc = -EINVAL;
+		goto chain_assoc_exit;
+	}
+
+	if (req->assoclen == 0) {
+		areq_ctx->assoc_buff_type = CC_DMA_BUF_NULL;
+		areq_ctx->assoc.nents = 0;
+		areq_ctx->assoc.mlli_nents = 0;
+		dev_dbg(dev, "Chain assoc of length 0: buff_type=%s nents=%u\n",
+			cc_dma_buf_type(areq_ctx->assoc_buff_type),
+			areq_ctx->assoc.nents);
+		goto chain_assoc_exit;
+	}
+
+	//iterate over the sgl to see how many entries are for associated data
+	//it is assumed that if we reach here , the sgl is already mapped
+	sg_index = current_sg->length;
+	//the first entry in the scatter list contains all the associated data
+	if (sg_index > size_of_assoc) {
+		mapped_nents++;
+	} else {
+		while (sg_index <= size_of_assoc) {
+			current_sg = sg_next(current_sg);
+			/* if have reached the end of the sgl, then this is
+			 * unexpected
+			 */
+			if (!current_sg) {
+				dev_err(dev, "reached end of sg list. unexpected\n");
+				return -EINVAL;
+			}
+			sg_index += current_sg->length;
+			mapped_nents++;
+		}
+	}
+	if (mapped_nents > LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES) {
+		dev_err(dev, "Too many fragments. current %d max %d\n",
+			mapped_nents, LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES);
+		return -ENOMEM;
+	}
+	areq_ctx->assoc.nents = mapped_nents;
+
+	/* in CCM case we have additional entry for
+	 * ccm header configurations
+	 */
+	if (areq_ctx->ccm_hdr_size != ccm_header_size_null) {
+		if ((mapped_nents + 1) > LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES) {
+			dev_err(dev, "CCM case.Too many fragments. Current %d max %d\n",
+				(areq_ctx->assoc.nents + 1),
+				LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES);
+			rc = -ENOMEM;
+			goto chain_assoc_exit;
+		}
+	}
+
+	if (mapped_nents == 1 && areq_ctx->ccm_hdr_size == ccm_header_size_null)
+		areq_ctx->assoc_buff_type = CC_DMA_BUF_DLLI;
+	else
+		areq_ctx->assoc_buff_type = CC_DMA_BUF_MLLI;
+
+	if (do_chain || areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI) {
+		dev_dbg(dev, "Chain assoc: buff_type=%s nents=%u\n",
+			cc_dma_buf_type(areq_ctx->assoc_buff_type),
+			areq_ctx->assoc.nents);
+		cc_add_sg_entry(dev, sg_data, areq_ctx->assoc.nents, req->src,
+				req->assoclen, 0, is_last,
+				&areq_ctx->assoc.mlli_nents);
+		areq_ctx->assoc_buff_type = CC_DMA_BUF_MLLI;
+	}
+
+chain_assoc_exit:
+	return rc;
+}
+
+static void cc_prepare_aead_data_dlli(struct aead_request *req,
+				      u32 *src_last_bytes, u32 *dst_last_bytes)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type;
+	unsigned int authsize = areq_ctx->req_authsize;
+
+	areq_ctx->is_icv_fragmented = false;
+	if (req->src == req->dst) {
+		/*INPLACE*/
+		areq_ctx->icv_dma_addr = sg_dma_address(areq_ctx->src_sgl) +
+			(*src_last_bytes - authsize);
+		areq_ctx->icv_virt_addr = sg_virt(areq_ctx->src_sgl) +
+			(*src_last_bytes - authsize);
+	} else if (direct == DRV_CRYPTO_DIRECTION_DECRYPT) {
+		/*NON-INPLACE and DECRYPT*/
+		areq_ctx->icv_dma_addr = sg_dma_address(areq_ctx->src_sgl) +
+			(*src_last_bytes - authsize);
+		areq_ctx->icv_virt_addr = sg_virt(areq_ctx->src_sgl) +
+			(*src_last_bytes - authsize);
+	} else {
+		/*NON-INPLACE and ENCRYPT*/
+		areq_ctx->icv_dma_addr = sg_dma_address(areq_ctx->dst_sgl) +
+			(*dst_last_bytes - authsize);
+		areq_ctx->icv_virt_addr = sg_virt(areq_ctx->dst_sgl) +
+			(*dst_last_bytes - authsize);
+	}
+}
+
+static int cc_prepare_aead_data_mlli(struct cc_drvdata *drvdata,
+				     struct aead_request *req,
+				     struct buffer_array *sg_data,
+				     u32 *src_last_bytes, u32 *dst_last_bytes,
+				     bool is_last_table)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type;
+	unsigned int authsize = areq_ctx->req_authsize;
+	int rc = 0, icv_nents;
+	struct device *dev = drvdata_to_dev(drvdata);
+	struct scatterlist *sg;
+
+	if (req->src == req->dst) {
+		/*INPLACE*/
+		cc_add_sg_entry(dev, sg_data, areq_ctx->src.nents,
+				areq_ctx->src_sgl, areq_ctx->cryptlen,
+				areq_ctx->src_offset, is_last_table,
+				&areq_ctx->src.mlli_nents);
+
+		icv_nents = cc_get_aead_icv_nents(dev, areq_ctx->src_sgl,
+						  areq_ctx->src.nents,
+						  authsize, *src_last_bytes,
+						  &areq_ctx->is_icv_fragmented);
+		if (icv_nents < 0) {
+			rc = -ENOTSUPP;
+			goto prepare_data_mlli_exit;
+		}
+
+		if (areq_ctx->is_icv_fragmented) {
+			/* Backup happens only when ICV is fragmented, ICV
+			 * verification is made by CPU compare in order to
+			 * simplify MAC verification upon request completion
+			 */
+			if (direct == DRV_CRYPTO_DIRECTION_DECRYPT) {
+				/* In coherent platforms (e.g. ACP)
+				 * already copying ICV for any
+				 * INPLACE-DECRYPT operation, hence
+				 * we must neglect this code.
+				 */
+				if (!drvdata->coherent)
+					cc_copy_mac(dev, req, CC_SG_TO_BUF);
+
+				areq_ctx->icv_virt_addr = areq_ctx->backup_mac;
+			} else {
+				areq_ctx->icv_virt_addr = areq_ctx->mac_buf;
+				areq_ctx->icv_dma_addr =
+					areq_ctx->mac_buf_dma_addr;
+			}
+		} else { /* Contig. ICV */
+			sg = &areq_ctx->src_sgl[areq_ctx->src.nents - 1];
+			/*Should hanlde if the sg is not contig.*/
+			areq_ctx->icv_dma_addr = sg_dma_address(sg) +
+				(*src_last_bytes - authsize);
+			areq_ctx->icv_virt_addr = sg_virt(sg) +
+				(*src_last_bytes - authsize);
+		}
+
+	} else if (direct == DRV_CRYPTO_DIRECTION_DECRYPT) {
+		/*NON-INPLACE and DECRYPT*/
+		cc_add_sg_entry(dev, sg_data, areq_ctx->src.nents,
+				areq_ctx->src_sgl, areq_ctx->cryptlen,
+				areq_ctx->src_offset, is_last_table,
+				&areq_ctx->src.mlli_nents);
+		cc_add_sg_entry(dev, sg_data, areq_ctx->dst.nents,
+				areq_ctx->dst_sgl, areq_ctx->cryptlen,
+				areq_ctx->dst_offset, is_last_table,
+				&areq_ctx->dst.mlli_nents);
+
+		icv_nents = cc_get_aead_icv_nents(dev, areq_ctx->src_sgl,
+						  areq_ctx->src.nents,
+						  authsize, *src_last_bytes,
+						  &areq_ctx->is_icv_fragmented);
+		if (icv_nents < 0) {
+			rc = -ENOTSUPP;
+			goto prepare_data_mlli_exit;
+		}
+
+		/* Backup happens only when ICV is fragmented, ICV
+		 * verification is made by CPU compare in order to simplify
+		 * MAC verification upon request completion
+		 */
+		if (areq_ctx->is_icv_fragmented) {
+			cc_copy_mac(dev, req, CC_SG_TO_BUF);
+			areq_ctx->icv_virt_addr = areq_ctx->backup_mac;
+
+		} else { /* Contig. ICV */
+			sg = &areq_ctx->src_sgl[areq_ctx->src.nents - 1];
+			/*Should hanlde if the sg is not contig.*/
+			areq_ctx->icv_dma_addr = sg_dma_address(sg) +
+				(*src_last_bytes - authsize);
+			areq_ctx->icv_virt_addr = sg_virt(sg) +
+				(*src_last_bytes - authsize);
+		}
+
+	} else {
+		/*NON-INPLACE and ENCRYPT*/
+		cc_add_sg_entry(dev, sg_data, areq_ctx->dst.nents,
+				areq_ctx->dst_sgl, areq_ctx->cryptlen,
+				areq_ctx->dst_offset, is_last_table,
+				&areq_ctx->dst.mlli_nents);
+		cc_add_sg_entry(dev, sg_data, areq_ctx->src.nents,
+				areq_ctx->src_sgl, areq_ctx->cryptlen,
+				areq_ctx->src_offset, is_last_table,
+				&areq_ctx->src.mlli_nents);
+
+		icv_nents = cc_get_aead_icv_nents(dev, areq_ctx->dst_sgl,
+						  areq_ctx->dst.nents,
+						  authsize, *dst_last_bytes,
+						  &areq_ctx->is_icv_fragmented);
+		if (icv_nents < 0) {
+			rc = -ENOTSUPP;
+			goto prepare_data_mlli_exit;
+		}
+
+		if (!areq_ctx->is_icv_fragmented) {
+			sg = &areq_ctx->dst_sgl[areq_ctx->dst.nents - 1];
+			/* Contig. ICV */
+			areq_ctx->icv_dma_addr = sg_dma_address(sg) +
+				(*dst_last_bytes - authsize);
+			areq_ctx->icv_virt_addr = sg_virt(sg) +
+				(*dst_last_bytes - authsize);
+		} else {
+			areq_ctx->icv_dma_addr = areq_ctx->mac_buf_dma_addr;
+			areq_ctx->icv_virt_addr = areq_ctx->mac_buf;
+		}
+	}
+
+prepare_data_mlli_exit:
+	return rc;
+}
+
+static int cc_aead_chain_data(struct cc_drvdata *drvdata,
+			      struct aead_request *req,
+			      struct buffer_array *sg_data,
+			      bool is_last_table, bool do_chain)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct device *dev = drvdata_to_dev(drvdata);
+	enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type;
+	unsigned int authsize = areq_ctx->req_authsize;
+	unsigned int src_last_bytes = 0, dst_last_bytes = 0;
+	int rc = 0;
+	u32 src_mapped_nents = 0, dst_mapped_nents = 0;
+	u32 offset = 0;
+	/* non-inplace mode */
+	unsigned int size_for_map = req->assoclen + req->cryptlen;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	u32 sg_index = 0;
+	bool chained = false;
+	bool is_gcm4543 = areq_ctx->is_gcm4543;
+	u32 size_to_skip = req->assoclen;
+
+	if (is_gcm4543)
+		size_to_skip += crypto_aead_ivsize(tfm);
+
+	offset = size_to_skip;
+
+	if (!sg_data)
+		return -EINVAL;
+
+	areq_ctx->src_sgl = req->src;
+	areq_ctx->dst_sgl = req->dst;
+
+	if (is_gcm4543)
+		size_for_map += crypto_aead_ivsize(tfm);
+
+	size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
+			authsize : 0;
+	src_mapped_nents = cc_get_sgl_nents(dev, req->src, size_for_map,
+					    &src_last_bytes, &chained);
+	sg_index = areq_ctx->src_sgl->length;
+	//check where the data starts
+	while (sg_index <= size_to_skip) {
+		offset -= areq_ctx->src_sgl->length;
+		areq_ctx->src_sgl = sg_next(areq_ctx->src_sgl);
+		//if have reached the end of the sgl, then this is unexpected
+		if (!areq_ctx->src_sgl) {
+			dev_err(dev, "reached end of sg list. unexpected\n");
+			return -EINVAL;
+		}
+		sg_index += areq_ctx->src_sgl->length;
+		src_mapped_nents--;
+	}
+	if (src_mapped_nents > LLI_MAX_NUM_OF_DATA_ENTRIES) {
+		dev_err(dev, "Too many fragments. current %d max %d\n",
+			src_mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES);
+		return -ENOMEM;
+	}
+
+	areq_ctx->src.nents = src_mapped_nents;
+
+	areq_ctx->src_offset = offset;
+
+	if (req->src != req->dst) {
+		size_for_map = req->assoclen + req->cryptlen;
+		size_for_map += (direct == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
+				authsize : 0;
+		if (is_gcm4543)
+			size_for_map += crypto_aead_ivsize(tfm);
+
+		rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL,
+			       &areq_ctx->dst.nents,
+			       LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes,
+			       &dst_mapped_nents);
+		if (rc) {
+			rc = -ENOMEM;
+			goto chain_data_exit;
+		}
+	}
+
+	dst_mapped_nents = cc_get_sgl_nents(dev, req->dst, size_for_map,
+					    &dst_last_bytes, &chained);
+	sg_index = areq_ctx->dst_sgl->length;
+	offset = size_to_skip;
+
+	//check where the data starts
+	while (sg_index <= size_to_skip) {
+		offset -= areq_ctx->dst_sgl->length;
+		areq_ctx->dst_sgl = sg_next(areq_ctx->dst_sgl);
+		//if have reached the end of the sgl, then this is unexpected
+		if (!areq_ctx->dst_sgl) {
+			dev_err(dev, "reached end of sg list. unexpected\n");
+			return -EINVAL;
+		}
+		sg_index += areq_ctx->dst_sgl->length;
+		dst_mapped_nents--;
+	}
+	if (dst_mapped_nents > LLI_MAX_NUM_OF_DATA_ENTRIES) {
+		dev_err(dev, "Too many fragments. current %d max %d\n",
+			dst_mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES);
+		return -ENOMEM;
+	}
+	areq_ctx->dst.nents = dst_mapped_nents;
+	areq_ctx->dst_offset = offset;
+	if (src_mapped_nents > 1 ||
+	    dst_mapped_nents  > 1 ||
+	    do_chain) {
+		areq_ctx->data_buff_type = CC_DMA_BUF_MLLI;
+		rc = cc_prepare_aead_data_mlli(drvdata, req, sg_data,
+					       &src_last_bytes,
+					       &dst_last_bytes, is_last_table);
+	} else {
+		areq_ctx->data_buff_type = CC_DMA_BUF_DLLI;
+		cc_prepare_aead_data_dlli(req, &src_last_bytes,
+					  &dst_last_bytes);
+	}
+
+chain_data_exit:
+	return rc;
+}
+
+static void cc_update_aead_mlli_nents(struct cc_drvdata *drvdata,
+				      struct aead_request *req)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	u32 curr_mlli_size = 0;
+
+	if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI) {
+		areq_ctx->assoc.sram_addr = drvdata->mlli_sram_addr;
+		curr_mlli_size = areq_ctx->assoc.mlli_nents *
+						LLI_ENTRY_BYTE_SIZE;
+	}
+
+	if (areq_ctx->data_buff_type == CC_DMA_BUF_MLLI) {
+		/*Inplace case dst nents equal to src nents*/
+		if (req->src == req->dst) {
+			areq_ctx->dst.mlli_nents = areq_ctx->src.mlli_nents;
+			areq_ctx->src.sram_addr = drvdata->mlli_sram_addr +
+								curr_mlli_size;
+			areq_ctx->dst.sram_addr = areq_ctx->src.sram_addr;
+			if (!areq_ctx->is_single_pass)
+				areq_ctx->assoc.mlli_nents +=
+					areq_ctx->src.mlli_nents;
+		} else {
+			if (areq_ctx->gen_ctx.op_type ==
+					DRV_CRYPTO_DIRECTION_DECRYPT) {
+				areq_ctx->src.sram_addr =
+						drvdata->mlli_sram_addr +
+								curr_mlli_size;
+				areq_ctx->dst.sram_addr =
+						areq_ctx->src.sram_addr +
+						areq_ctx->src.mlli_nents *
+						LLI_ENTRY_BYTE_SIZE;
+				if (!areq_ctx->is_single_pass)
+					areq_ctx->assoc.mlli_nents +=
+						areq_ctx->src.mlli_nents;
+			} else {
+				areq_ctx->dst.sram_addr =
+						drvdata->mlli_sram_addr +
+								curr_mlli_size;
+				areq_ctx->src.sram_addr =
+						areq_ctx->dst.sram_addr +
+						areq_ctx->dst.mlli_nents *
+						LLI_ENTRY_BYTE_SIZE;
+				if (!areq_ctx->is_single_pass)
+					areq_ctx->assoc.mlli_nents +=
+						areq_ctx->dst.mlli_nents;
+			}
+		}
+	}
+}
+
+int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
+{
+	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct mlli_params *mlli_params = &areq_ctx->mlli_params;
+	struct device *dev = drvdata_to_dev(drvdata);
+	struct buffer_array sg_data;
+	unsigned int authsize = areq_ctx->req_authsize;
+	struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
+	int rc = 0;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	bool is_gcm4543 = areq_ctx->is_gcm4543;
+	dma_addr_t dma_addr;
+	u32 mapped_nents = 0;
+	u32 dummy = 0; /*used for the assoc data fragments */
+	u32 size_to_map = 0;
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	mlli_params->curr_pool = NULL;
+	sg_data.num_of_buffers = 0;
+
+	/* copy mac to a temporary location to deal with possible
+	 * data memory overriding that caused by cache coherence problem.
+	 */
+	if (drvdata->coherent &&
+	    areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT &&
+	    req->src == req->dst)
+		cc_copy_mac(dev, req, CC_SG_TO_BUF);
+
+	/* cacluate the size for cipher remove ICV in decrypt*/
+	areq_ctx->cryptlen = (areq_ctx->gen_ctx.op_type ==
+				 DRV_CRYPTO_DIRECTION_ENCRYPT) ?
+				req->cryptlen :
+				(req->cryptlen - authsize);
+
+	dma_addr = dma_map_single(dev, areq_ctx->mac_buf, MAX_MAC_SIZE,
+				  DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma_addr)) {
+		dev_err(dev, "Mapping mac_buf %u B at va=%pK for DMA failed\n",
+			MAX_MAC_SIZE, areq_ctx->mac_buf);
+		rc = -ENOMEM;
+		goto aead_map_failure;
+	}
+	areq_ctx->mac_buf_dma_addr = dma_addr;
+
+	if (areq_ctx->ccm_hdr_size != ccm_header_size_null) {
+		void *addr = areq_ctx->ccm_config + CCM_CTR_COUNT_0_OFFSET;
+
+		dma_addr = dma_map_single(dev, addr, AES_BLOCK_SIZE,
+					  DMA_TO_DEVICE);
+
+		if (dma_mapping_error(dev, dma_addr)) {
+			dev_err(dev, "Mapping mac_buf %u B at va=%pK for DMA failed\n",
+				AES_BLOCK_SIZE, addr);
+			areq_ctx->ccm_iv0_dma_addr = 0;
+			rc = -ENOMEM;
+			goto aead_map_failure;
+		}
+		areq_ctx->ccm_iv0_dma_addr = dma_addr;
+
+		if (cc_set_aead_conf_buf(dev, areq_ctx, areq_ctx->ccm_config,
+					 &sg_data, req->assoclen)) {
+			rc = -ENOMEM;
+			goto aead_map_failure;
+		}
+	}
+
+	if (areq_ctx->cipher_mode == DRV_CIPHER_GCTR) {
+		dma_addr = dma_map_single(dev, areq_ctx->hkey, AES_BLOCK_SIZE,
+					  DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_addr)) {
+			dev_err(dev, "Mapping hkey %u B at va=%pK for DMA failed\n",
+				AES_BLOCK_SIZE, areq_ctx->hkey);
+			rc = -ENOMEM;
+			goto aead_map_failure;
+		}
+		areq_ctx->hkey_dma_addr = dma_addr;
+
+		dma_addr = dma_map_single(dev, &areq_ctx->gcm_len_block,
+					  AES_BLOCK_SIZE, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_addr)) {
+			dev_err(dev, "Mapping gcm_len_block %u B at va=%pK for DMA failed\n",
+				AES_BLOCK_SIZE, &areq_ctx->gcm_len_block);
+			rc = -ENOMEM;
+			goto aead_map_failure;
+		}
+		areq_ctx->gcm_block_len_dma_addr = dma_addr;
+
+		dma_addr = dma_map_single(dev, areq_ctx->gcm_iv_inc1,
+					  AES_BLOCK_SIZE, DMA_TO_DEVICE);
+
+		if (dma_mapping_error(dev, dma_addr)) {
+			dev_err(dev, "Mapping gcm_iv_inc1 %u B at va=%pK for DMA failed\n",
+				AES_BLOCK_SIZE, (areq_ctx->gcm_iv_inc1));
+			areq_ctx->gcm_iv_inc1_dma_addr = 0;
+			rc = -ENOMEM;
+			goto aead_map_failure;
+		}
+		areq_ctx->gcm_iv_inc1_dma_addr = dma_addr;
+
+		dma_addr = dma_map_single(dev, areq_ctx->gcm_iv_inc2,
+					  AES_BLOCK_SIZE, DMA_TO_DEVICE);
+
+		if (dma_mapping_error(dev, dma_addr)) {
+			dev_err(dev, "Mapping gcm_iv_inc2 %u B at va=%pK for DMA failed\n",
+				AES_BLOCK_SIZE, (areq_ctx->gcm_iv_inc2));
+			areq_ctx->gcm_iv_inc2_dma_addr = 0;
+			rc = -ENOMEM;
+			goto aead_map_failure;
+		}
+		areq_ctx->gcm_iv_inc2_dma_addr = dma_addr;
+	}
+
+	size_to_map = req->cryptlen + req->assoclen;
+	if (areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_ENCRYPT)
+		size_to_map += authsize;
+
+	if (is_gcm4543)
+		size_to_map += crypto_aead_ivsize(tfm);
+	rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL,
+		       &areq_ctx->src.nents,
+		       (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES +
+			LLI_MAX_NUM_OF_DATA_ENTRIES),
+		       &dummy, &mapped_nents);
+	if (rc) {
+		rc = -ENOMEM;
+		goto aead_map_failure;
+	}
+
+	if (areq_ctx->is_single_pass) {
+		/*
+		 * Create MLLI table for:
+		 *   (1) Assoc. data
+		 *   (2) Src/Dst SGLs
+		 *   Note: IV is contg. buffer (not an SGL)
+		 */
+		rc = cc_aead_chain_assoc(drvdata, req, &sg_data, true, false);
+		if (rc)
+			goto aead_map_failure;
+		rc = cc_aead_chain_iv(drvdata, req, &sg_data, true, false);
+		if (rc)
+			goto aead_map_failure;
+		rc = cc_aead_chain_data(drvdata, req, &sg_data, true, false);
+		if (rc)
+			goto aead_map_failure;
+	} else { /* DOUBLE-PASS flow */
+		/*
+		 * Prepare MLLI table(s) in this order:
+		 *
+		 * If ENCRYPT/DECRYPT (inplace):
+		 *   (1) MLLI table for assoc
+		 *   (2) IV entry (chained right after end of assoc)
+		 *   (3) MLLI for src/dst (inplace operation)
+		 *
+		 * If ENCRYPT (non-inplace)
+		 *   (1) MLLI table for assoc
+		 *   (2) IV entry (chained right after end of assoc)
+		 *   (3) MLLI for dst
+		 *   (4) MLLI for src
+		 *
+		 * If DECRYPT (non-inplace)
+		 *   (1) MLLI table for assoc
+		 *   (2) IV entry (chained right after end of assoc)
+		 *   (3) MLLI for src
+		 *   (4) MLLI for dst
+		 */
+		rc = cc_aead_chain_assoc(drvdata, req, &sg_data, false, true);
+		if (rc)
+			goto aead_map_failure;
+		rc = cc_aead_chain_iv(drvdata, req, &sg_data, false, true);
+		if (rc)
+			goto aead_map_failure;
+		rc = cc_aead_chain_data(drvdata, req, &sg_data, true, true);
+		if (rc)
+			goto aead_map_failure;
+	}
+
+	/* Mlli support -start building the MLLI according to the above
+	 * results
+	 */
+	if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI ||
+	    areq_ctx->data_buff_type == CC_DMA_BUF_MLLI) {
+		mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+		rc = cc_generate_mlli(dev, &sg_data, mlli_params, flags);
+		if (rc)
+			goto aead_map_failure;
+
+		cc_update_aead_mlli_nents(drvdata, req);
+		dev_dbg(dev, "assoc params mn %d\n",
+			areq_ctx->assoc.mlli_nents);
+		dev_dbg(dev, "src params mn %d\n", areq_ctx->src.mlli_nents);
+		dev_dbg(dev, "dst params mn %d\n", areq_ctx->dst.mlli_nents);
+	}
+	return 0;
+
+aead_map_failure:
+	cc_unmap_aead_request(dev, req);
+	return rc;
+}
+
+int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx,
+			      struct scatterlist *src, unsigned int nbytes,
+			      bool do_update, gfp_t flags)
+{
+	struct ahash_req_ctx *areq_ctx = (struct ahash_req_ctx *)ctx;
+	struct device *dev = drvdata_to_dev(drvdata);
+	u8 *curr_buff = cc_hash_buf(areq_ctx);
+	u32 *curr_buff_cnt = cc_hash_buf_cnt(areq_ctx);
+	struct mlli_params *mlli_params = &areq_ctx->mlli_params;
+	struct buffer_array sg_data;
+	struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
+	u32 dummy = 0;
+	u32 mapped_nents = 0;
+
+	dev_dbg(dev, "final params : curr_buff=%pK curr_buff_cnt=0x%X nbytes = 0x%X src=%pK curr_index=%u\n",
+		curr_buff, *curr_buff_cnt, nbytes, src, areq_ctx->buff_index);
+	/* Init the type of the dma buffer */
+	areq_ctx->data_dma_buf_type = CC_DMA_BUF_NULL;
+	mlli_params->curr_pool = NULL;
+	sg_data.num_of_buffers = 0;
+	areq_ctx->in_nents = 0;
+
+	if (nbytes == 0 && *curr_buff_cnt == 0) {
+		/* nothing to do */
+		return 0;
+	}
+
+	/*TODO: copy data in case that buffer is enough for operation */
+	/* map the previous buffer */
+	if (*curr_buff_cnt) {
+		if (cc_set_hash_buf(dev, areq_ctx, curr_buff, *curr_buff_cnt,
+				    &sg_data)) {
+			return -ENOMEM;
+		}
+	}
+
+	if (src && nbytes > 0 && do_update) {
+		if (cc_map_sg(dev, src, nbytes, DMA_TO_DEVICE,
+			      &areq_ctx->in_nents, LLI_MAX_NUM_OF_DATA_ENTRIES,
+			      &dummy, &mapped_nents)) {
+			goto unmap_curr_buff;
+		}
+		if (src && mapped_nents == 1 &&
+		    areq_ctx->data_dma_buf_type == CC_DMA_BUF_NULL) {
+			memcpy(areq_ctx->buff_sg, src,
+			       sizeof(struct scatterlist));
+			areq_ctx->buff_sg->length = nbytes;
+			areq_ctx->curr_sg = areq_ctx->buff_sg;
+			areq_ctx->data_dma_buf_type = CC_DMA_BUF_DLLI;
+		} else {
+			areq_ctx->data_dma_buf_type = CC_DMA_BUF_MLLI;
+		}
+	}
+
+	/*build mlli */
+	if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) {
+		mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+		/* add the src data to the sg_data */
+		cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src, nbytes,
+				0, true, &areq_ctx->mlli_nents);
+		if (cc_generate_mlli(dev, &sg_data, mlli_params, flags))
+			goto fail_unmap_din;
+	}
+	/* change the buffer index for the unmap function */
+	areq_ctx->buff_index = (areq_ctx->buff_index ^ 1);
+	dev_dbg(dev, "areq_ctx->data_dma_buf_type = %s\n",
+		cc_dma_buf_type(areq_ctx->data_dma_buf_type));
+	return 0;
+
+fail_unmap_din:
+	dma_unmap_sg(dev, src, areq_ctx->in_nents, DMA_TO_DEVICE);
+
+unmap_curr_buff:
+	if (*curr_buff_cnt)
+		dma_unmap_sg(dev, areq_ctx->buff_sg, 1, DMA_TO_DEVICE);
+
+	return -ENOMEM;
+}
+
+int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx,
+			       struct scatterlist *src, unsigned int nbytes,
+			       unsigned int block_size, gfp_t flags)
+{
+	struct ahash_req_ctx *areq_ctx = (struct ahash_req_ctx *)ctx;
+	struct device *dev = drvdata_to_dev(drvdata);
+	u8 *curr_buff = cc_hash_buf(areq_ctx);
+	u32 *curr_buff_cnt = cc_hash_buf_cnt(areq_ctx);
+	u8 *next_buff = cc_next_buf(areq_ctx);
+	u32 *next_buff_cnt = cc_next_buf_cnt(areq_ctx);
+	struct mlli_params *mlli_params = &areq_ctx->mlli_params;
+	unsigned int update_data_len;
+	u32 total_in_len = nbytes + *curr_buff_cnt;
+	struct buffer_array sg_data;
+	struct buff_mgr_handle *buff_mgr = drvdata->buff_mgr_handle;
+	unsigned int swap_index = 0;
+	u32 dummy = 0;
+	u32 mapped_nents = 0;
+
+	dev_dbg(dev, " update params : curr_buff=%pK curr_buff_cnt=0x%X nbytes=0x%X src=%pK curr_index=%u\n",
+		curr_buff, *curr_buff_cnt, nbytes, src, areq_ctx->buff_index);
+	/* Init the type of the dma buffer */
+	areq_ctx->data_dma_buf_type = CC_DMA_BUF_NULL;
+	mlli_params->curr_pool = NULL;
+	areq_ctx->curr_sg = NULL;
+	sg_data.num_of_buffers = 0;
+	areq_ctx->in_nents = 0;
+
+	if (total_in_len < block_size) {
+		dev_dbg(dev, " less than one block: curr_buff=%pK *curr_buff_cnt=0x%X copy_to=%pK\n",
+			curr_buff, *curr_buff_cnt, &curr_buff[*curr_buff_cnt]);
+		areq_ctx->in_nents =
+			cc_get_sgl_nents(dev, src, nbytes, &dummy, NULL);
+		sg_copy_to_buffer(src, areq_ctx->in_nents,
+				  &curr_buff[*curr_buff_cnt], nbytes);
+		*curr_buff_cnt += nbytes;
+		return 1;
+	}
+
+	/* Calculate the residue size*/
+	*next_buff_cnt = total_in_len & (block_size - 1);
+	/* update data len */
+	update_data_len = total_in_len - *next_buff_cnt;
+
+	dev_dbg(dev, " temp length : *next_buff_cnt=0x%X update_data_len=0x%X\n",
+		*next_buff_cnt, update_data_len);
+
+	/* Copy the new residue to next buffer */
+	if (*next_buff_cnt) {
+		dev_dbg(dev, " handle residue: next buff %pK skip data %u residue %u\n",
+			next_buff, (update_data_len - *curr_buff_cnt),
+			*next_buff_cnt);
+		cc_copy_sg_portion(dev, next_buff, src,
+				   (update_data_len - *curr_buff_cnt),
+				   nbytes, CC_SG_TO_BUF);
+		/* change the buffer index for next operation */
+		swap_index = 1;
+	}
+
+	if (*curr_buff_cnt) {
+		if (cc_set_hash_buf(dev, areq_ctx, curr_buff, *curr_buff_cnt,
+				    &sg_data)) {
+			return -ENOMEM;
+		}
+		/* change the buffer index for next operation */
+		swap_index = 1;
+	}
+
+	if (update_data_len > *curr_buff_cnt) {
+		if (cc_map_sg(dev, src, (update_data_len - *curr_buff_cnt),
+			      DMA_TO_DEVICE, &areq_ctx->in_nents,
+			      LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy,
+			      &mapped_nents)) {
+			goto unmap_curr_buff;
+		}
+		if (mapped_nents == 1 &&
+		    areq_ctx->data_dma_buf_type == CC_DMA_BUF_NULL) {
+			/* only one entry in the SG and no previous data */
+			memcpy(areq_ctx->buff_sg, src,
+			       sizeof(struct scatterlist));
+			areq_ctx->buff_sg->length = update_data_len;
+			areq_ctx->data_dma_buf_type = CC_DMA_BUF_DLLI;
+			areq_ctx->curr_sg = areq_ctx->buff_sg;
+		} else {
+			areq_ctx->data_dma_buf_type = CC_DMA_BUF_MLLI;
+		}
+	}
+
+	if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_MLLI) {
+		mlli_params->curr_pool = buff_mgr->mlli_buffs_pool;
+		/* add the src data to the sg_data */
+		cc_add_sg_entry(dev, &sg_data, areq_ctx->in_nents, src,
+				(update_data_len - *curr_buff_cnt), 0, true,
+				&areq_ctx->mlli_nents);
+		if (cc_generate_mlli(dev, &sg_data, mlli_params, flags))
+			goto fail_unmap_din;
+	}
+	areq_ctx->buff_index = (areq_ctx->buff_index ^ swap_index);
+
+	return 0;
+
+fail_unmap_din:
+	dma_unmap_sg(dev, src, areq_ctx->in_nents, DMA_TO_DEVICE);
+
+unmap_curr_buff:
+	if (*curr_buff_cnt)
+		dma_unmap_sg(dev, areq_ctx->buff_sg, 1, DMA_TO_DEVICE);
+
+	return -ENOMEM;
+}
+
+void cc_unmap_hash_request(struct device *dev, void *ctx,
+			   struct scatterlist *src, bool do_revert)
+{
+	struct ahash_req_ctx *areq_ctx = (struct ahash_req_ctx *)ctx;
+	u32 *prev_len = cc_next_buf_cnt(areq_ctx);
+
+	/*In case a pool was set, a table was
+	 *allocated and should be released
+	 */
+	if (areq_ctx->mlli_params.curr_pool) {
+		dev_dbg(dev, "free MLLI buffer: dma=%pad virt=%pK\n",
+			&areq_ctx->mlli_params.mlli_dma_addr,
+			areq_ctx->mlli_params.mlli_virt_addr);
+		dma_pool_free(areq_ctx->mlli_params.curr_pool,
+			      areq_ctx->mlli_params.mlli_virt_addr,
+			      areq_ctx->mlli_params.mlli_dma_addr);
+	}
+
+	if (src && areq_ctx->in_nents) {
+		dev_dbg(dev, "Unmapped sg src: virt=%pK dma=%pad len=0x%X\n",
+			sg_virt(src), &sg_dma_address(src), sg_dma_len(src));
+		dma_unmap_sg(dev, src,
+			     areq_ctx->in_nents, DMA_TO_DEVICE);
+	}
+
+	if (*prev_len) {
+		dev_dbg(dev, "Unmapped buffer: areq_ctx->buff_sg=%pK dma=%pad len 0x%X\n",
+			sg_virt(areq_ctx->buff_sg),
+			&sg_dma_address(areq_ctx->buff_sg),
+			sg_dma_len(areq_ctx->buff_sg));
+		dma_unmap_sg(dev, areq_ctx->buff_sg, 1, DMA_TO_DEVICE);
+		if (!do_revert) {
+			/* clean the previous data length for update
+			 * operation
+			 */
+			*prev_len = 0;
+		} else {
+			areq_ctx->buff_index ^= 1;
+		}
+	}
+}
+
+int cc_buffer_mgr_init(struct cc_drvdata *drvdata)
+{
+	struct buff_mgr_handle *buff_mgr_handle;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	buff_mgr_handle = kmalloc(sizeof(*buff_mgr_handle), GFP_KERNEL);
+	if (!buff_mgr_handle)
+		return -ENOMEM;
+
+	drvdata->buff_mgr_handle = buff_mgr_handle;
+
+	buff_mgr_handle->mlli_buffs_pool =
+		dma_pool_create("dx_single_mlli_tables", dev,
+				MAX_NUM_OF_TOTAL_MLLI_ENTRIES *
+				LLI_ENTRY_BYTE_SIZE,
+				MLLI_TABLE_MIN_ALIGNMENT, 0);
+
+	if (!buff_mgr_handle->mlli_buffs_pool)
+		goto error;
+
+	return 0;
+
+error:
+	cc_buffer_mgr_fini(drvdata);
+	return -ENOMEM;
+}
+
+int cc_buffer_mgr_fini(struct cc_drvdata *drvdata)
+{
+	struct buff_mgr_handle *buff_mgr_handle = drvdata->buff_mgr_handle;
+
+	if (buff_mgr_handle) {
+		dma_pool_destroy(buff_mgr_handle->mlli_buffs_pool);
+		kfree(drvdata->buff_mgr_handle);
+		drvdata->buff_mgr_handle = NULL;
+	}
+	return 0;
+}
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h
new file mode 100644
index 000000000000..3ec4b4db5247
--- /dev/null
+++ b/drivers/crypto/ccree/cc_buffer_mgr.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+/* \file cc_buffer_mgr.h
+ * Buffer Manager
+ */
+
+#ifndef __CC_BUFFER_MGR_H__
+#define __CC_BUFFER_MGR_H__
+
+#include <crypto/algapi.h>
+
+#include "cc_driver.h"
+
+enum cc_req_dma_buf_type {
+	CC_DMA_BUF_NULL = 0,
+	CC_DMA_BUF_DLLI,
+	CC_DMA_BUF_MLLI
+};
+
+enum cc_sg_cpy_direct {
+	CC_SG_TO_BUF = 0,
+	CC_SG_FROM_BUF = 1
+};
+
+struct cc_mlli {
+	cc_sram_addr_t sram_addr;
+	unsigned int nents; //sg nents
+	unsigned int mlli_nents; //mlli nents might be different than the above
+};
+
+struct mlli_params {
+	struct dma_pool *curr_pool;
+	u8 *mlli_virt_addr;
+	dma_addr_t mlli_dma_addr;
+	u32 mlli_len;
+};
+
+int cc_buffer_mgr_init(struct cc_drvdata *drvdata);
+
+int cc_buffer_mgr_fini(struct cc_drvdata *drvdata);
+
+int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx,
+			  unsigned int ivsize, unsigned int nbytes,
+			  void *info, struct scatterlist *src,
+			  struct scatterlist *dst, gfp_t flags);
+
+void cc_unmap_cipher_request(struct device *dev, void *ctx, unsigned int ivsize,
+			     struct scatterlist *src, struct scatterlist *dst);
+
+int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req);
+
+void cc_unmap_aead_request(struct device *dev, struct aead_request *req);
+
+int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx,
+			      struct scatterlist *src, unsigned int nbytes,
+			      bool do_update, gfp_t flags);
+
+int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx,
+			       struct scatterlist *src, unsigned int nbytes,
+			       unsigned int block_size, gfp_t flags);
+
+void cc_unmap_hash_request(struct device *dev, void *ctx,
+			   struct scatterlist *src, bool do_revert);
+
+void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg,
+			u32 to_skip, u32 end, enum cc_sg_cpy_direct direct);
+
+void cc_zero_sgl(struct scatterlist *sgl, u32 data_len);
+
+#endif /*__BUFFER_MGR_H__*/
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
new file mode 100644
index 000000000000..df98f7afe645
--- /dev/null
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -0,0 +1,1150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/des.h>
+#include <crypto/xts.h>
+#include <crypto/scatterwalk.h>
+
+#include "cc_driver.h"
+#include "cc_lli_defs.h"
+#include "cc_buffer_mgr.h"
+#include "cc_cipher.h"
+#include "cc_request_mgr.h"
+
+#define MAX_ABLKCIPHER_SEQ_LEN 6
+
+#define template_skcipher	template_u.skcipher
+
+#define CC_MIN_AES_XTS_SIZE 0x10
+#define CC_MAX_AES_XTS_SIZE 0x2000
+struct cc_cipher_handle {
+	struct list_head alg_list;
+};
+
+struct cc_user_key_info {
+	u8 *key;
+	dma_addr_t key_dma_addr;
+};
+
+struct cc_hw_key_info {
+	enum cc_hw_crypto_key key1_slot;
+	enum cc_hw_crypto_key key2_slot;
+};
+
+struct cc_cipher_ctx {
+	struct cc_drvdata *drvdata;
+	int keylen;
+	int key_round_number;
+	int cipher_mode;
+	int flow_mode;
+	unsigned int flags;
+	struct cc_user_key_info user;
+	struct cc_hw_key_info hw;
+	struct crypto_shash *shash_tfm;
+};
+
+static void cc_cipher_complete(struct device *dev, void *cc_req, int err);
+
+static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size)
+{
+	switch (ctx_p->flow_mode) {
+	case S_DIN_to_AES:
+		switch (size) {
+		case CC_AES_128_BIT_KEY_SIZE:
+		case CC_AES_192_BIT_KEY_SIZE:
+			if (ctx_p->cipher_mode != DRV_CIPHER_XTS &&
+			    ctx_p->cipher_mode != DRV_CIPHER_ESSIV &&
+			    ctx_p->cipher_mode != DRV_CIPHER_BITLOCKER)
+				return 0;
+			break;
+		case CC_AES_256_BIT_KEY_SIZE:
+			return 0;
+		case (CC_AES_192_BIT_KEY_SIZE * 2):
+		case (CC_AES_256_BIT_KEY_SIZE * 2):
+			if (ctx_p->cipher_mode == DRV_CIPHER_XTS ||
+			    ctx_p->cipher_mode == DRV_CIPHER_ESSIV ||
+			    ctx_p->cipher_mode == DRV_CIPHER_BITLOCKER)
+				return 0;
+			break;
+		default:
+			break;
+		}
+	case S_DIN_to_DES:
+		if (size == DES3_EDE_KEY_SIZE || size == DES_KEY_SIZE)
+			return 0;
+		break;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+static int validate_data_size(struct cc_cipher_ctx *ctx_p,
+			      unsigned int size)
+{
+	switch (ctx_p->flow_mode) {
+	case S_DIN_to_AES:
+		switch (ctx_p->cipher_mode) {
+		case DRV_CIPHER_XTS:
+			if (size >= CC_MIN_AES_XTS_SIZE &&
+			    size <= CC_MAX_AES_XTS_SIZE &&
+			    IS_ALIGNED(size, AES_BLOCK_SIZE))
+				return 0;
+			break;
+		case DRV_CIPHER_CBC_CTS:
+			if (size >= AES_BLOCK_SIZE)
+				return 0;
+			break;
+		case DRV_CIPHER_OFB:
+		case DRV_CIPHER_CTR:
+				return 0;
+		case DRV_CIPHER_ECB:
+		case DRV_CIPHER_CBC:
+		case DRV_CIPHER_ESSIV:
+		case DRV_CIPHER_BITLOCKER:
+			if (IS_ALIGNED(size, AES_BLOCK_SIZE))
+				return 0;
+			break;
+		default:
+			break;
+		}
+		break;
+	case S_DIN_to_DES:
+		if (IS_ALIGNED(size, DES_BLOCK_SIZE))
+			return 0;
+		break;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+static int cc_cipher_init(struct crypto_tfm *tfm)
+{
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+	struct cc_crypto_alg *cc_alg =
+			container_of(tfm->__crt_alg, struct cc_crypto_alg,
+				     skcipher_alg.base);
+	struct device *dev = drvdata_to_dev(cc_alg->drvdata);
+	unsigned int max_key_buf_size = cc_alg->skcipher_alg.max_keysize;
+	int rc = 0;
+
+	dev_dbg(dev, "Initializing context @%p for %s\n", ctx_p,
+		crypto_tfm_alg_name(tfm));
+
+	crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
+				    sizeof(struct cipher_req_ctx));
+
+	ctx_p->cipher_mode = cc_alg->cipher_mode;
+	ctx_p->flow_mode = cc_alg->flow_mode;
+	ctx_p->drvdata = cc_alg->drvdata;
+
+	/* Allocate key buffer, cache line aligned */
+	ctx_p->user.key = kmalloc(max_key_buf_size, GFP_KERNEL);
+	if (!ctx_p->user.key)
+		return -ENOMEM;
+
+	dev_dbg(dev, "Allocated key buffer in context. key=@%p\n",
+		ctx_p->user.key);
+
+	/* Map key buffer */
+	ctx_p->user.key_dma_addr = dma_map_single(dev, (void *)ctx_p->user.key,
+						  max_key_buf_size,
+						  DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) {
+		dev_err(dev, "Mapping Key %u B at va=%pK for DMA failed\n",
+			max_key_buf_size, ctx_p->user.key);
+		return -ENOMEM;
+	}
+	dev_dbg(dev, "Mapped key %u B at va=%pK to dma=%pad\n",
+		max_key_buf_size, ctx_p->user.key, &ctx_p->user.key_dma_addr);
+
+	if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
+		/* Alloc hash tfm for essiv */
+		ctx_p->shash_tfm = crypto_alloc_shash("sha256-generic", 0, 0);
+		if (IS_ERR(ctx_p->shash_tfm)) {
+			dev_err(dev, "Error allocating hash tfm for ESSIV.\n");
+			return PTR_ERR(ctx_p->shash_tfm);
+		}
+	}
+
+	return rc;
+}
+
+static void cc_cipher_exit(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct cc_crypto_alg *cc_alg =
+			container_of(alg, struct cc_crypto_alg,
+				     skcipher_alg.base);
+	unsigned int max_key_buf_size = cc_alg->skcipher_alg.max_keysize;
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx_p->drvdata);
+
+	dev_dbg(dev, "Clearing context @%p for %s\n",
+		crypto_tfm_ctx(tfm), crypto_tfm_alg_name(tfm));
+
+	if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
+		/* Free hash tfm for essiv */
+		crypto_free_shash(ctx_p->shash_tfm);
+		ctx_p->shash_tfm = NULL;
+	}
+
+	/* Unmap key buffer */
+	dma_unmap_single(dev, ctx_p->user.key_dma_addr, max_key_buf_size,
+			 DMA_TO_DEVICE);
+	dev_dbg(dev, "Unmapped key buffer key_dma_addr=%pad\n",
+		&ctx_p->user.key_dma_addr);
+
+	/* Free key buffer in context */
+	kzfree(ctx_p->user.key);
+	dev_dbg(dev, "Free key buffer in context. key=@%p\n", ctx_p->user.key);
+}
+
+struct tdes_keys {
+	u8	key1[DES_KEY_SIZE];
+	u8	key2[DES_KEY_SIZE];
+	u8	key3[DES_KEY_SIZE];
+};
+
+static enum cc_hw_crypto_key hw_key_to_cc_hw_key(int slot_num)
+{
+	switch (slot_num) {
+	case 0:
+		return KFDE0_KEY;
+	case 1:
+		return KFDE1_KEY;
+	case 2:
+		return KFDE2_KEY;
+	case 3:
+		return KFDE3_KEY;
+	}
+	return END_OF_KEYS;
+}
+
+static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(sktfm);
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx_p->drvdata);
+	u32 tmp[DES3_EDE_EXPKEY_WORDS];
+	struct cc_crypto_alg *cc_alg =
+			container_of(tfm->__crt_alg, struct cc_crypto_alg,
+				     skcipher_alg.base);
+	unsigned int max_key_buf_size = cc_alg->skcipher_alg.max_keysize;
+
+	dev_dbg(dev, "Setting key in context @%p for %s. keylen=%u\n",
+		ctx_p, crypto_tfm_alg_name(tfm), keylen);
+	dump_byte_array("key", (u8 *)key, keylen);
+
+	/* STAT_PHASE_0: Init and sanity checks */
+
+	if (validate_keys_sizes(ctx_p, keylen)) {
+		dev_err(dev, "Unsupported key size %d.\n", keylen);
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	if (cc_is_hw_key(tfm)) {
+		/* setting HW key slots */
+		struct arm_hw_key_info *hki = (struct arm_hw_key_info *)key;
+
+		if (ctx_p->flow_mode != S_DIN_to_AES) {
+			dev_err(dev, "HW key not supported for non-AES flows\n");
+			return -EINVAL;
+		}
+
+		ctx_p->hw.key1_slot = hw_key_to_cc_hw_key(hki->hw_key1);
+		if (ctx_p->hw.key1_slot == END_OF_KEYS) {
+			dev_err(dev, "Unsupported hw key1 number (%d)\n",
+				hki->hw_key1);
+			return -EINVAL;
+		}
+
+		if (ctx_p->cipher_mode == DRV_CIPHER_XTS ||
+		    ctx_p->cipher_mode == DRV_CIPHER_ESSIV ||
+		    ctx_p->cipher_mode == DRV_CIPHER_BITLOCKER) {
+			if (hki->hw_key1 == hki->hw_key2) {
+				dev_err(dev, "Illegal hw key numbers (%d,%d)\n",
+					hki->hw_key1, hki->hw_key2);
+				return -EINVAL;
+			}
+			ctx_p->hw.key2_slot =
+				hw_key_to_cc_hw_key(hki->hw_key2);
+			if (ctx_p->hw.key2_slot == END_OF_KEYS) {
+				dev_err(dev, "Unsupported hw key2 number (%d)\n",
+					hki->hw_key2);
+				return -EINVAL;
+			}
+		}
+
+		ctx_p->keylen = keylen;
+		dev_dbg(dev, "cc_is_hw_key ret 0");
+
+		return 0;
+	}
+
+	/*
+	 * Verify DES weak keys
+	 * Note that we're dropping the expanded key since the
+	 * HW does the expansion on its own.
+	 */
+	if (ctx_p->flow_mode == S_DIN_to_DES) {
+		if (keylen == DES3_EDE_KEY_SIZE &&
+		    __des3_ede_setkey(tmp, &tfm->crt_flags, key,
+				      DES3_EDE_KEY_SIZE)) {
+			dev_dbg(dev, "weak 3DES key");
+			return -EINVAL;
+		} else if (!des_ekey(tmp, key) &&
+		    (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_WEAK_KEY)) {
+			tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+			dev_dbg(dev, "weak DES key");
+			return -EINVAL;
+		}
+	}
+
+	if (ctx_p->cipher_mode == DRV_CIPHER_XTS &&
+	    xts_check_key(tfm, key, keylen)) {
+		dev_dbg(dev, "weak XTS key");
+		return -EINVAL;
+	}
+
+	/* STAT_PHASE_1: Copy key to ctx */
+	dma_sync_single_for_cpu(dev, ctx_p->user.key_dma_addr,
+				max_key_buf_size, DMA_TO_DEVICE);
+
+	memcpy(ctx_p->user.key, key, keylen);
+	if (keylen == 24)
+		memset(ctx_p->user.key + 24, 0, CC_AES_KEY_SIZE_MAX - 24);
+
+	if (ctx_p->cipher_mode == DRV_CIPHER_ESSIV) {
+		/* sha256 for key2 - use sw implementation */
+		int key_len = keylen >> 1;
+		int err;
+
+		SHASH_DESC_ON_STACK(desc, ctx_p->shash_tfm);
+
+		desc->tfm = ctx_p->shash_tfm;
+
+		err = crypto_shash_digest(desc, ctx_p->user.key, key_len,
+					  ctx_p->user.key + key_len);
+		if (err) {
+			dev_err(dev, "Failed to hash ESSIV key.\n");
+			return err;
+		}
+	}
+	dma_sync_single_for_device(dev, ctx_p->user.key_dma_addr,
+				   max_key_buf_size, DMA_TO_DEVICE);
+	ctx_p->keylen = keylen;
+
+	dev_dbg(dev, "return safely");
+	return 0;
+}
+
+static void cc_setup_cipher_desc(struct crypto_tfm *tfm,
+				 struct cipher_req_ctx *req_ctx,
+				 unsigned int ivsize, unsigned int nbytes,
+				 struct cc_hw_desc desc[],
+				 unsigned int *seq_size)
+{
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx_p->drvdata);
+	int cipher_mode = ctx_p->cipher_mode;
+	int flow_mode = ctx_p->flow_mode;
+	int direction = req_ctx->gen_ctx.op_type;
+	dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr;
+	unsigned int key_len = ctx_p->keylen;
+	dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr;
+	unsigned int du_size = nbytes;
+
+	struct cc_crypto_alg *cc_alg =
+		container_of(tfm->__crt_alg, struct cc_crypto_alg,
+			     skcipher_alg.base);
+
+	if (cc_alg->data_unit)
+		du_size = cc_alg->data_unit;
+
+	switch (cipher_mode) {
+	case DRV_CIPHER_CBC:
+	case DRV_CIPHER_CBC_CTS:
+	case DRV_CIPHER_CTR:
+	case DRV_CIPHER_OFB:
+		/* Load cipher state */
+		hw_desc_init(&desc[*seq_size]);
+		set_din_type(&desc[*seq_size], DMA_DLLI, iv_dma_addr, ivsize,
+			     NS_BIT);
+		set_cipher_config0(&desc[*seq_size], direction);
+		set_flow_mode(&desc[*seq_size], flow_mode);
+		set_cipher_mode(&desc[*seq_size], cipher_mode);
+		if (cipher_mode == DRV_CIPHER_CTR ||
+		    cipher_mode == DRV_CIPHER_OFB) {
+			set_setup_mode(&desc[*seq_size], SETUP_LOAD_STATE1);
+		} else {
+			set_setup_mode(&desc[*seq_size], SETUP_LOAD_STATE0);
+		}
+		(*seq_size)++;
+		/*FALLTHROUGH*/
+	case DRV_CIPHER_ECB:
+		/* Load key */
+		hw_desc_init(&desc[*seq_size]);
+		set_cipher_mode(&desc[*seq_size], cipher_mode);
+		set_cipher_config0(&desc[*seq_size], direction);
+		if (flow_mode == S_DIN_to_AES) {
+			if (cc_is_hw_key(tfm)) {
+				set_hw_crypto_key(&desc[*seq_size],
+						  ctx_p->hw.key1_slot);
+			} else {
+				set_din_type(&desc[*seq_size], DMA_DLLI,
+					     key_dma_addr, ((key_len == 24) ?
+							    AES_MAX_KEY_SIZE :
+							    key_len), NS_BIT);
+			}
+			set_key_size_aes(&desc[*seq_size], key_len);
+		} else {
+			/*des*/
+			set_din_type(&desc[*seq_size], DMA_DLLI, key_dma_addr,
+				     key_len, NS_BIT);
+			set_key_size_des(&desc[*seq_size], key_len);
+		}
+		set_flow_mode(&desc[*seq_size], flow_mode);
+		set_setup_mode(&desc[*seq_size], SETUP_LOAD_KEY0);
+		(*seq_size)++;
+		break;
+	case DRV_CIPHER_XTS:
+	case DRV_CIPHER_ESSIV:
+	case DRV_CIPHER_BITLOCKER:
+		/* Load AES key */
+		hw_desc_init(&desc[*seq_size]);
+		set_cipher_mode(&desc[*seq_size], cipher_mode);
+		set_cipher_config0(&desc[*seq_size], direction);
+		if (cc_is_hw_key(tfm)) {
+			set_hw_crypto_key(&desc[*seq_size],
+					  ctx_p->hw.key1_slot);
+		} else {
+			set_din_type(&desc[*seq_size], DMA_DLLI, key_dma_addr,
+				     (key_len / 2), NS_BIT);
+		}
+		set_key_size_aes(&desc[*seq_size], (key_len / 2));
+		set_flow_mode(&desc[*seq_size], flow_mode);
+		set_setup_mode(&desc[*seq_size], SETUP_LOAD_KEY0);
+		(*seq_size)++;
+
+		/* load XEX key */
+		hw_desc_init(&desc[*seq_size]);
+		set_cipher_mode(&desc[*seq_size], cipher_mode);
+		set_cipher_config0(&desc[*seq_size], direction);
+		if (cc_is_hw_key(tfm)) {
+			set_hw_crypto_key(&desc[*seq_size],
+					  ctx_p->hw.key2_slot);
+		} else {
+			set_din_type(&desc[*seq_size], DMA_DLLI,
+				     (key_dma_addr + (key_len / 2)),
+				     (key_len / 2), NS_BIT);
+		}
+		set_xex_data_unit_size(&desc[*seq_size], du_size);
+		set_flow_mode(&desc[*seq_size], S_DIN_to_AES2);
+		set_key_size_aes(&desc[*seq_size], (key_len / 2));
+		set_setup_mode(&desc[*seq_size], SETUP_LOAD_XEX_KEY);
+		(*seq_size)++;
+
+		/* Set state */
+		hw_desc_init(&desc[*seq_size]);
+		set_setup_mode(&desc[*seq_size], SETUP_LOAD_STATE1);
+		set_cipher_mode(&desc[*seq_size], cipher_mode);
+		set_cipher_config0(&desc[*seq_size], direction);
+		set_key_size_aes(&desc[*seq_size], (key_len / 2));
+		set_flow_mode(&desc[*seq_size], flow_mode);
+		set_din_type(&desc[*seq_size], DMA_DLLI, iv_dma_addr,
+			     CC_AES_BLOCK_SIZE, NS_BIT);
+		(*seq_size)++;
+		break;
+	default:
+		dev_err(dev, "Unsupported cipher mode (%d)\n", cipher_mode);
+	}
+}
+
+static void cc_setup_cipher_data(struct crypto_tfm *tfm,
+				 struct cipher_req_ctx *req_ctx,
+				 struct scatterlist *dst,
+				 struct scatterlist *src, unsigned int nbytes,
+				 void *areq, struct cc_hw_desc desc[],
+				 unsigned int *seq_size)
+{
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx_p->drvdata);
+	unsigned int flow_mode = ctx_p->flow_mode;
+
+	switch (ctx_p->flow_mode) {
+	case S_DIN_to_AES:
+		flow_mode = DIN_AES_DOUT;
+		break;
+	case S_DIN_to_DES:
+		flow_mode = DIN_DES_DOUT;
+		break;
+	default:
+		dev_err(dev, "invalid flow mode, flow_mode = %d\n", flow_mode);
+		return;
+	}
+	/* Process */
+	if (req_ctx->dma_buf_type == CC_DMA_BUF_DLLI) {
+		dev_dbg(dev, " data params addr %pad length 0x%X\n",
+			&sg_dma_address(src), nbytes);
+		dev_dbg(dev, " data params addr %pad length 0x%X\n",
+			&sg_dma_address(dst), nbytes);
+		hw_desc_init(&desc[*seq_size]);
+		set_din_type(&desc[*seq_size], DMA_DLLI, sg_dma_address(src),
+			     nbytes, NS_BIT);
+		set_dout_dlli(&desc[*seq_size], sg_dma_address(dst),
+			      nbytes, NS_BIT, (!areq ? 0 : 1));
+		if (areq)
+			set_queue_last_ind(ctx_p->drvdata, &desc[*seq_size]);
+
+		set_flow_mode(&desc[*seq_size], flow_mode);
+		(*seq_size)++;
+	} else {
+		/* bypass */
+		dev_dbg(dev, " bypass params addr %pad length 0x%X addr 0x%08X\n",
+			&req_ctx->mlli_params.mlli_dma_addr,
+			req_ctx->mlli_params.mlli_len,
+			(unsigned int)ctx_p->drvdata->mlli_sram_addr);
+		hw_desc_init(&desc[*seq_size]);
+		set_din_type(&desc[*seq_size], DMA_DLLI,
+			     req_ctx->mlli_params.mlli_dma_addr,
+			     req_ctx->mlli_params.mlli_len, NS_BIT);
+		set_dout_sram(&desc[*seq_size],
+			      ctx_p->drvdata->mlli_sram_addr,
+			      req_ctx->mlli_params.mlli_len);
+		set_flow_mode(&desc[*seq_size], BYPASS);
+		(*seq_size)++;
+
+		hw_desc_init(&desc[*seq_size]);
+		set_din_type(&desc[*seq_size], DMA_MLLI,
+			     ctx_p->drvdata->mlli_sram_addr,
+			     req_ctx->in_mlli_nents, NS_BIT);
+		if (req_ctx->out_nents == 0) {
+			dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n",
+				(unsigned int)ctx_p->drvdata->mlli_sram_addr,
+				(unsigned int)ctx_p->drvdata->mlli_sram_addr);
+			set_dout_mlli(&desc[*seq_size],
+				      ctx_p->drvdata->mlli_sram_addr,
+				      req_ctx->in_mlli_nents, NS_BIT,
+				      (!areq ? 0 : 1));
+		} else {
+			dev_dbg(dev, " din/dout params addr 0x%08X addr 0x%08X\n",
+				(unsigned int)ctx_p->drvdata->mlli_sram_addr,
+				(unsigned int)ctx_p->drvdata->mlli_sram_addr +
+				(u32)LLI_ENTRY_BYTE_SIZE * req_ctx->in_nents);
+			set_dout_mlli(&desc[*seq_size],
+				      (ctx_p->drvdata->mlli_sram_addr +
+				       (LLI_ENTRY_BYTE_SIZE *
+					req_ctx->in_mlli_nents)),
+				      req_ctx->out_mlli_nents, NS_BIT,
+				      (!areq ? 0 : 1));
+		}
+		if (areq)
+			set_queue_last_ind(ctx_p->drvdata, &desc[*seq_size]);
+
+		set_flow_mode(&desc[*seq_size], flow_mode);
+		(*seq_size)++;
+	}
+}
+
+static void cc_cipher_complete(struct device *dev, void *cc_req, int err)
+{
+	struct skcipher_request *req = (struct skcipher_request *)cc_req;
+	struct scatterlist *dst = req->dst;
+	struct scatterlist *src = req->src;
+	struct cipher_req_ctx *req_ctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+
+	cc_unmap_cipher_request(dev, req_ctx, ivsize, src, dst);
+	kzfree(req_ctx->iv);
+
+	/*
+	 * The crypto API expects us to set the req->iv to the last
+	 * ciphertext block. For encrypt, simply copy from the result.
+	 * For decrypt, we must copy from a saved buffer since this
+	 * could be an in-place decryption operation and the src is
+	 * lost by this point.
+	 */
+	if (req_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT)  {
+		memcpy(req->iv, req_ctx->backup_info, ivsize);
+		kzfree(req_ctx->backup_info);
+	} else if (!err) {
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 (req->cryptlen - ivsize),
+					 ivsize, 0);
+	}
+
+	skcipher_request_complete(req, err);
+}
+
+static int cc_cipher_process(struct skcipher_request *req,
+			     enum drv_crypto_direction direction)
+{
+	struct crypto_skcipher *sk_tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(sk_tfm);
+	struct cipher_req_ctx *req_ctx = skcipher_request_ctx(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(sk_tfm);
+	struct scatterlist *dst = req->dst;
+	struct scatterlist *src = req->src;
+	unsigned int nbytes = req->cryptlen;
+	void *iv = req->iv;
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx_p->drvdata);
+	struct cc_hw_desc desc[MAX_ABLKCIPHER_SEQ_LEN];
+	struct cc_crypto_req cc_req = {};
+	int rc, cts_restore_flag = 0;
+	unsigned int seq_len = 0;
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	dev_dbg(dev, "%s req=%p iv=%p nbytes=%d\n",
+		((direction == DRV_CRYPTO_DIRECTION_ENCRYPT) ?
+		"Encrypt" : "Decrypt"), req, iv, nbytes);
+
+	/* STAT_PHASE_0: Init and sanity checks */
+
+	/* TODO: check data length according to mode */
+	if (validate_data_size(ctx_p, nbytes)) {
+		dev_err(dev, "Unsupported data size %d.\n", nbytes);
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_BLOCK_LEN);
+		rc = -EINVAL;
+		goto exit_process;
+	}
+	if (nbytes == 0) {
+		/* No data to process is valid */
+		rc = 0;
+		goto exit_process;
+	}
+
+	/* The IV we are handed may be allocted from the stack so
+	 * we must copy it to a DMAable buffer before use.
+	 */
+	req_ctx->iv = kmemdup(iv, ivsize, flags);
+	if (!req_ctx->iv) {
+		rc = -ENOMEM;
+		goto exit_process;
+	}
+
+	/*For CTS in case of data size aligned to 16 use CBC mode*/
+	if (((nbytes % AES_BLOCK_SIZE) == 0) &&
+	    ctx_p->cipher_mode == DRV_CIPHER_CBC_CTS) {
+		ctx_p->cipher_mode = DRV_CIPHER_CBC;
+		cts_restore_flag = 1;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = (void *)cc_cipher_complete;
+	cc_req.user_arg = (void *)req;
+
+#ifdef ENABLE_CYCLE_COUNT
+	cc_req.op_type = (direction == DRV_CRYPTO_DIRECTION_DECRYPT) ?
+		STAT_OP_TYPE_DECODE : STAT_OP_TYPE_ENCODE;
+
+#endif
+
+	/* Setup request context */
+	req_ctx->gen_ctx.op_type = direction;
+
+	/* STAT_PHASE_1: Map buffers */
+
+	rc = cc_map_cipher_request(ctx_p->drvdata, req_ctx, ivsize, nbytes,
+				      req_ctx->iv, src, dst, flags);
+	if (rc) {
+		dev_err(dev, "map_request() failed\n");
+		goto exit_process;
+	}
+
+	/* STAT_PHASE_2: Create sequence */
+
+	/* Setup processing */
+	cc_setup_cipher_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len);
+	/* Data processing */
+	cc_setup_cipher_data(tfm, req_ctx, dst, src, nbytes, req, desc,
+			     &seq_len);
+
+	/* do we need to generate IV? */
+	if (req_ctx->is_giv) {
+		cc_req.ivgen_dma_addr[0] = req_ctx->gen_ctx.iv_dma_addr;
+		cc_req.ivgen_dma_addr_len = 1;
+		/* set the IV size (8/16 B long)*/
+		cc_req.ivgen_size = ivsize;
+	}
+
+	/* STAT_PHASE_3: Lock HW and push sequence */
+
+	rc = cc_send_request(ctx_p->drvdata, &cc_req, desc, seq_len,
+			     &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		/* Failed to send the request or request completed
+		 * synchronously
+		 */
+		cc_unmap_cipher_request(dev, req_ctx, ivsize, src, dst);
+	}
+
+exit_process:
+	if (cts_restore_flag)
+		ctx_p->cipher_mode = DRV_CIPHER_CBC_CTS;
+
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		kzfree(req_ctx->backup_info);
+		kzfree(req_ctx->iv);
+	}
+
+	return rc;
+}
+
+static int cc_cipher_encrypt(struct skcipher_request *req)
+{
+	struct cipher_req_ctx *req_ctx = skcipher_request_ctx(req);
+
+	req_ctx->is_giv = false;
+	req_ctx->backup_info = NULL;
+
+	return cc_cipher_process(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
+}
+
+static int cc_cipher_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *sk_tfm = crypto_skcipher_reqtfm(req);
+	struct cipher_req_ctx *req_ctx = skcipher_request_ctx(req);
+	unsigned int ivsize = crypto_skcipher_ivsize(sk_tfm);
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	/*
+	 * Allocate and save the last IV sized bytes of the source, which will
+	 * be lost in case of in-place decryption and might be needed for CTS.
+	 */
+	req_ctx->backup_info = kmalloc(ivsize, flags);
+	if (!req_ctx->backup_info)
+		return -ENOMEM;
+
+	scatterwalk_map_and_copy(req_ctx->backup_info, req->src,
+				 (req->cryptlen - ivsize), ivsize, 0);
+	req_ctx->is_giv = false;
+
+	return cc_cipher_process(req, DRV_CRYPTO_DIRECTION_DECRYPT);
+}
+
+/* Block cipher alg */
+static const struct cc_alg_template skcipher_algs[] = {
+	{
+		.name = "xts(aes)",
+		.driver_name = "xts-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_XTS,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "xts512(aes)",
+		.driver_name = "xts-aes-du512-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_XTS,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 512,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "xts4096(aes)",
+		.driver_name = "xts-aes-du4096-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_XTS,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 4096,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "essiv(aes)",
+		.driver_name = "essiv-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_ESSIV,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "essiv512(aes)",
+		.driver_name = "essiv-aes-du512-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_ESSIV,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 512,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "essiv4096(aes)",
+		.driver_name = "essiv-aes-du4096-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_ESSIV,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 4096,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "bitlocker(aes)",
+		.driver_name = "bitlocker-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_BITLOCKER,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "bitlocker512(aes)",
+		.driver_name = "bitlocker-aes-du512-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_BITLOCKER,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 512,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "bitlocker4096(aes)",
+		.driver_name = "bitlocker-aes-du4096-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE * 2,
+			.max_keysize = AES_MAX_KEY_SIZE * 2,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_BITLOCKER,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 4096,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "ecb(aes)",
+		.driver_name = "ecb-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = 0,
+			},
+		.cipher_mode = DRV_CIPHER_ECB,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "cbc(aes)",
+		.driver_name = "cbc-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "ofb(aes)",
+		.driver_name = "ofb-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_OFB,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "cts1(cbc(aes))",
+		.driver_name = "cts1-cbc-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_CBC_CTS,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "ctr(aes)",
+		.driver_name = "ctr-aes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_CTR,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "cbc(des3_ede)",
+		.driver_name = "cbc-3des-ccree",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = DES3_EDE_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_DES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "ecb(des3_ede)",
+		.driver_name = "ecb-3des-ccree",
+		.blocksize = DES3_EDE_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = 0,
+			},
+		.cipher_mode = DRV_CIPHER_ECB,
+		.flow_mode = S_DIN_to_DES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "cbc(des)",
+		.driver_name = "cbc-des-ccree",
+		.blocksize = DES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.ivsize = DES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_DES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "ecb(des)",
+		.driver_name = "ecb-des-ccree",
+		.blocksize = DES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_setkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.ivsize = 0,
+			},
+		.cipher_mode = DRV_CIPHER_ECB,
+		.flow_mode = S_DIN_to_DES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+};
+
+static struct cc_crypto_alg *cc_create_alg(const struct cc_alg_template *tmpl,
+					   struct device *dev)
+{
+	struct cc_crypto_alg *t_alg;
+	struct skcipher_alg *alg;
+
+	t_alg = kzalloc(sizeof(*t_alg), GFP_KERNEL);
+	if (!t_alg)
+		return ERR_PTR(-ENOMEM);
+
+	alg = &t_alg->skcipher_alg;
+
+	memcpy(alg, &tmpl->template_skcipher, sizeof(*alg));
+
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 tmpl->driver_name);
+	alg->base.cra_module = THIS_MODULE;
+	alg->base.cra_priority = CC_CRA_PRIO;
+	alg->base.cra_blocksize = tmpl->blocksize;
+	alg->base.cra_alignmask = 0;
+	alg->base.cra_ctxsize = sizeof(struct cc_cipher_ctx);
+
+	alg->base.cra_init = cc_cipher_init;
+	alg->base.cra_exit = cc_cipher_exit;
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY |
+				CRYPTO_ALG_TYPE_SKCIPHER;
+
+	t_alg->cipher_mode = tmpl->cipher_mode;
+	t_alg->flow_mode = tmpl->flow_mode;
+	t_alg->data_unit = tmpl->data_unit;
+
+	return t_alg;
+}
+
+int cc_cipher_free(struct cc_drvdata *drvdata)
+{
+	struct cc_crypto_alg *t_alg, *n;
+	struct cc_cipher_handle *cipher_handle = drvdata->cipher_handle;
+
+	if (cipher_handle) {
+		/* Remove registered algs */
+		list_for_each_entry_safe(t_alg, n, &cipher_handle->alg_list,
+					 entry) {
+			crypto_unregister_skcipher(&t_alg->skcipher_alg);
+			list_del(&t_alg->entry);
+			kfree(t_alg);
+		}
+		kfree(cipher_handle);
+		drvdata->cipher_handle = NULL;
+	}
+	return 0;
+}
+
+int cc_cipher_alloc(struct cc_drvdata *drvdata)
+{
+	struct cc_cipher_handle *cipher_handle;
+	struct cc_crypto_alg *t_alg;
+	struct device *dev = drvdata_to_dev(drvdata);
+	int rc = -ENOMEM;
+	int alg;
+
+	cipher_handle = kmalloc(sizeof(*cipher_handle), GFP_KERNEL);
+	if (!cipher_handle)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&cipher_handle->alg_list);
+	drvdata->cipher_handle = cipher_handle;
+
+	/* Linux crypto */
+	dev_dbg(dev, "Number of algorithms = %zu\n",
+		ARRAY_SIZE(skcipher_algs));
+	for (alg = 0; alg < ARRAY_SIZE(skcipher_algs); alg++) {
+		if (skcipher_algs[alg].min_hw_rev > drvdata->hw_rev)
+			continue;
+
+		dev_dbg(dev, "creating %s\n", skcipher_algs[alg].driver_name);
+		t_alg = cc_create_alg(&skcipher_algs[alg], dev);
+		if (IS_ERR(t_alg)) {
+			rc = PTR_ERR(t_alg);
+			dev_err(dev, "%s alg allocation failed\n",
+				skcipher_algs[alg].driver_name);
+			goto fail0;
+		}
+		t_alg->drvdata = drvdata;
+
+		dev_dbg(dev, "registering %s\n",
+			skcipher_algs[alg].driver_name);
+		rc = crypto_register_skcipher(&t_alg->skcipher_alg);
+		dev_dbg(dev, "%s alg registration rc = %x\n",
+			t_alg->skcipher_alg.base.cra_driver_name, rc);
+		if (rc) {
+			dev_err(dev, "%s alg registration failed\n",
+				t_alg->skcipher_alg.base.cra_driver_name);
+			kfree(t_alg);
+			goto fail0;
+		} else {
+			list_add_tail(&t_alg->entry,
+				      &cipher_handle->alg_list);
+			dev_dbg(dev, "Registered %s\n",
+				t_alg->skcipher_alg.base.cra_driver_name);
+		}
+	}
+	return 0;
+
+fail0:
+	cc_cipher_free(drvdata);
+	return rc;
+}
diff --git a/drivers/crypto/ccree/cc_cipher.h b/drivers/crypto/ccree/cc_cipher.h
new file mode 100644
index 000000000000..2a2a6f46c515
--- /dev/null
+++ b/drivers/crypto/ccree/cc_cipher.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+/* \file cc_cipher.h
+ * ARM CryptoCell Cipher Crypto API
+ */
+
+#ifndef __CC_CIPHER_H__
+#define __CC_CIPHER_H__
+
+#include <linux/kernel.h>
+#include <crypto/algapi.h>
+#include "cc_driver.h"
+#include "cc_buffer_mgr.h"
+
+/* Crypto cipher flags */
+#define CC_CRYPTO_CIPHER_KEY_KFDE0	BIT(0)
+#define CC_CRYPTO_CIPHER_KEY_KFDE1	BIT(1)
+#define CC_CRYPTO_CIPHER_KEY_KFDE2	BIT(2)
+#define CC_CRYPTO_CIPHER_KEY_KFDE3	BIT(3)
+#define CC_CRYPTO_CIPHER_DU_SIZE_512B	BIT(4)
+
+#define CC_CRYPTO_CIPHER_KEY_KFDE_MASK (CC_CRYPTO_CIPHER_KEY_KFDE0 | \
+					CC_CRYPTO_CIPHER_KEY_KFDE1 | \
+					CC_CRYPTO_CIPHER_KEY_KFDE2 | \
+					CC_CRYPTO_CIPHER_KEY_KFDE3)
+
+struct cipher_req_ctx {
+	struct async_gen_req_ctx gen_ctx;
+	enum cc_req_dma_buf_type dma_buf_type;
+	u32 in_nents;
+	u32 in_mlli_nents;
+	u32 out_nents;
+	u32 out_mlli_nents;
+	u8 *backup_info; /*store iv for generated IV flow*/
+	u8 *iv;
+	bool is_giv;
+	struct mlli_params mlli_params;
+};
+
+int cc_cipher_alloc(struct cc_drvdata *drvdata);
+
+int cc_cipher_free(struct cc_drvdata *drvdata);
+
+struct arm_hw_key_info {
+	int hw_key1;
+	int hw_key2;
+};
+
+/*
+ * This is a stub function that will replaced when we
+ * implement secure keys
+ */
+static inline bool cc_is_hw_key(struct crypto_tfm *tfm)
+{
+	return false;
+}
+
+#endif /*__CC_CIPHER_H__*/
diff --git a/drivers/crypto/ccree/cc_crypto_ctx.h b/drivers/crypto/ccree/cc_crypto_ctx.h
new file mode 100644
index 000000000000..e032544f4e31
--- /dev/null
+++ b/drivers/crypto/ccree/cc_crypto_ctx.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef _CC_CRYPTO_CTX_H_
+#define _CC_CRYPTO_CTX_H_
+
+#include <linux/types.h>
+
+#define CC_DRV_DES_IV_SIZE 8
+#define CC_DRV_DES_BLOCK_SIZE 8
+
+#define CC_DRV_DES_ONE_KEY_SIZE 8
+#define CC_DRV_DES_DOUBLE_KEY_SIZE 16
+#define CC_DRV_DES_TRIPLE_KEY_SIZE 24
+#define CC_DRV_DES_KEY_SIZE_MAX CC_DRV_DES_TRIPLE_KEY_SIZE
+
+#define CC_AES_IV_SIZE 16
+#define CC_AES_IV_SIZE_WORDS (CC_AES_IV_SIZE >> 2)
+
+#define CC_AES_BLOCK_SIZE 16
+#define CC_AES_BLOCK_SIZE_WORDS 4
+
+#define CC_AES_128_BIT_KEY_SIZE 16
+#define CC_AES_128_BIT_KEY_SIZE_WORDS	(CC_AES_128_BIT_KEY_SIZE >> 2)
+#define CC_AES_192_BIT_KEY_SIZE 24
+#define CC_AES_192_BIT_KEY_SIZE_WORDS	(CC_AES_192_BIT_KEY_SIZE >> 2)
+#define CC_AES_256_BIT_KEY_SIZE 32
+#define CC_AES_256_BIT_KEY_SIZE_WORDS	(CC_AES_256_BIT_KEY_SIZE >> 2)
+#define CC_AES_KEY_SIZE_MAX			CC_AES_256_BIT_KEY_SIZE
+#define CC_AES_KEY_SIZE_WORDS_MAX		(CC_AES_KEY_SIZE_MAX >> 2)
+
+#define CC_MD5_DIGEST_SIZE	16
+#define CC_SHA1_DIGEST_SIZE	20
+#define CC_SHA224_DIGEST_SIZE	28
+#define CC_SHA256_DIGEST_SIZE	32
+#define CC_SHA256_DIGEST_SIZE_IN_WORDS 8
+#define CC_SHA384_DIGEST_SIZE	48
+#define CC_SHA512_DIGEST_SIZE	64
+
+#define CC_SHA1_BLOCK_SIZE 64
+#define CC_SHA1_BLOCK_SIZE_IN_WORDS 16
+#define CC_MD5_BLOCK_SIZE 64
+#define CC_MD5_BLOCK_SIZE_IN_WORDS 16
+#define CC_SHA224_BLOCK_SIZE 64
+#define CC_SHA256_BLOCK_SIZE 64
+#define CC_SHA256_BLOCK_SIZE_IN_WORDS 16
+#define CC_SHA1_224_256_BLOCK_SIZE 64
+#define CC_SHA384_BLOCK_SIZE 128
+#define CC_SHA512_BLOCK_SIZE 128
+
+#define CC_DIGEST_SIZE_MAX CC_SHA512_DIGEST_SIZE
+#define CC_HASH_BLOCK_SIZE_MAX CC_SHA512_BLOCK_SIZE /*1024b*/
+
+#define CC_HMAC_BLOCK_SIZE_MAX CC_HASH_BLOCK_SIZE_MAX
+
+#define CC_DRV_ALG_MAX_BLOCK_SIZE CC_HASH_BLOCK_SIZE_MAX
+
+enum drv_engine_type {
+	DRV_ENGINE_NULL = 0,
+	DRV_ENGINE_AES = 1,
+	DRV_ENGINE_DES = 2,
+	DRV_ENGINE_HASH = 3,
+	DRV_ENGINE_RC4 = 4,
+	DRV_ENGINE_DOUT = 5,
+	DRV_ENGINE_RESERVE32B = S32_MAX,
+};
+
+enum drv_crypto_alg {
+	DRV_CRYPTO_ALG_NULL = -1,
+	DRV_CRYPTO_ALG_AES  = 0,
+	DRV_CRYPTO_ALG_DES  = 1,
+	DRV_CRYPTO_ALG_HASH = 2,
+	DRV_CRYPTO_ALG_C2   = 3,
+	DRV_CRYPTO_ALG_HMAC = 4,
+	DRV_CRYPTO_ALG_AEAD = 5,
+	DRV_CRYPTO_ALG_BYPASS = 6,
+	DRV_CRYPTO_ALG_NUM = 7,
+	DRV_CRYPTO_ALG_RESERVE32B = S32_MAX
+};
+
+enum drv_crypto_direction {
+	DRV_CRYPTO_DIRECTION_NULL = -1,
+	DRV_CRYPTO_DIRECTION_ENCRYPT = 0,
+	DRV_CRYPTO_DIRECTION_DECRYPT = 1,
+	DRV_CRYPTO_DIRECTION_DECRYPT_ENCRYPT = 3,
+	DRV_CRYPTO_DIRECTION_RESERVE32B = S32_MAX
+};
+
+enum drv_cipher_mode {
+	DRV_CIPHER_NULL_MODE = -1,
+	DRV_CIPHER_ECB = 0,
+	DRV_CIPHER_CBC = 1,
+	DRV_CIPHER_CTR = 2,
+	DRV_CIPHER_CBC_MAC = 3,
+	DRV_CIPHER_XTS = 4,
+	DRV_CIPHER_XCBC_MAC = 5,
+	DRV_CIPHER_OFB = 6,
+	DRV_CIPHER_CMAC = 7,
+	DRV_CIPHER_CCM = 8,
+	DRV_CIPHER_CBC_CTS = 11,
+	DRV_CIPHER_GCTR = 12,
+	DRV_CIPHER_ESSIV = 13,
+	DRV_CIPHER_BITLOCKER = 14,
+	DRV_CIPHER_RESERVE32B = S32_MAX
+};
+
+enum drv_hash_mode {
+	DRV_HASH_NULL = -1,
+	DRV_HASH_SHA1 = 0,
+	DRV_HASH_SHA256 = 1,
+	DRV_HASH_SHA224 = 2,
+	DRV_HASH_SHA512 = 3,
+	DRV_HASH_SHA384 = 4,
+	DRV_HASH_MD5 = 5,
+	DRV_HASH_CBC_MAC = 6,
+	DRV_HASH_XCBC_MAC = 7,
+	DRV_HASH_CMAC = 8,
+	DRV_HASH_MODE_NUM = 9,
+	DRV_HASH_RESERVE32B = S32_MAX
+};
+
+enum drv_hash_hw_mode {
+	DRV_HASH_HW_MD5 = 0,
+	DRV_HASH_HW_SHA1 = 1,
+	DRV_HASH_HW_SHA256 = 2,
+	DRV_HASH_HW_SHA224 = 10,
+	DRV_HASH_HW_SHA512 = 4,
+	DRV_HASH_HW_SHA384 = 12,
+	DRV_HASH_HW_GHASH = 6,
+	DRV_HASH_HW_RESERVE32B = S32_MAX
+};
+
+#endif /* _CC_CRYPTO_CTX_H_ */
diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c
new file mode 100644
index 000000000000..08f8db489cf0
--- /dev/null
+++ b/drivers/crypto/ccree/cc_debugfs.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/stringify.h>
+#include "cc_driver.h"
+#include "cc_crypto_ctx.h"
+#include "cc_debugfs.h"
+
+struct cc_debugfs_ctx {
+	struct dentry *dir;
+};
+
+#define CC_DEBUG_REG(_X) {	\
+	.name = __stringify(_X),\
+	.offset = CC_REG(_X)	\
+	}
+
+/*
+ * This is a global var for the dentry of the
+ * debugfs ccree/ dir. It is not tied down to
+ * a specific instance of ccree, hence it is
+ * global.
+ */
+static struct dentry *cc_debugfs_dir;
+
+static struct debugfs_reg32 debug_regs[] = {
+	CC_DEBUG_REG(HOST_SIGNATURE),
+	CC_DEBUG_REG(HOST_IRR),
+	CC_DEBUG_REG(HOST_POWER_DOWN_EN),
+	CC_DEBUG_REG(AXIM_MON_ERR),
+	CC_DEBUG_REG(DSCRPTR_QUEUE_CONTENT),
+	CC_DEBUG_REG(HOST_IMR),
+	CC_DEBUG_REG(AXIM_CFG),
+	CC_DEBUG_REG(AXIM_CACHE_PARAMS),
+	CC_DEBUG_REG(HOST_VERSION),
+	CC_DEBUG_REG(GPR_HOST),
+	CC_DEBUG_REG(AXIM_MON_COMP),
+};
+
+int __init cc_debugfs_global_init(void)
+{
+	cc_debugfs_dir = debugfs_create_dir("ccree", NULL);
+
+	return !cc_debugfs_dir;
+}
+
+void __exit cc_debugfs_global_fini(void)
+{
+	debugfs_remove(cc_debugfs_dir);
+}
+
+int cc_debugfs_init(struct cc_drvdata *drvdata)
+{
+	struct device *dev = drvdata_to_dev(drvdata);
+	struct cc_debugfs_ctx *ctx;
+	struct debugfs_regset32 *regset;
+	struct dentry *file;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return -ENOMEM;
+
+	regset->regs = debug_regs;
+	regset->nregs = ARRAY_SIZE(debug_regs);
+	regset->base = drvdata->cc_base;
+
+	ctx->dir = debugfs_create_dir(drvdata->plat_dev->name, cc_debugfs_dir);
+	if (!ctx->dir)
+		return -ENFILE;
+
+	file = debugfs_create_regset32("regs", 0400, ctx->dir, regset);
+	if (!file) {
+		debugfs_remove(ctx->dir);
+		return -ENFILE;
+	}
+
+	file = debugfs_create_bool("coherent", 0400, ctx->dir,
+				   &drvdata->coherent);
+
+	if (!file) {
+		debugfs_remove_recursive(ctx->dir);
+		return -ENFILE;
+	}
+
+	drvdata->debugfs = ctx;
+
+	return 0;
+}
+
+void cc_debugfs_fini(struct cc_drvdata *drvdata)
+{
+	struct cc_debugfs_ctx *ctx = (struct cc_debugfs_ctx *)drvdata->debugfs;
+
+	debugfs_remove_recursive(ctx->dir);
+}
diff --git a/drivers/crypto/ccree/cc_debugfs.h b/drivers/crypto/ccree/cc_debugfs.h
new file mode 100644
index 000000000000..5b5320eca7d2
--- /dev/null
+++ b/drivers/crypto/ccree/cc_debugfs.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef __CC_DEBUGFS_H__
+#define __CC_DEBUGFS_H__
+
+#ifdef CONFIG_DEBUG_FS
+int cc_debugfs_global_init(void);
+void cc_debugfs_global_fini(void);
+
+int cc_debugfs_init(struct cc_drvdata *drvdata);
+void cc_debugfs_fini(struct cc_drvdata *drvdata);
+
+#else
+
+static inline int cc_debugfs_global_init(void)
+{
+	return 0;
+}
+
+static inline void cc_debugfs_global_fini(void) {}
+
+static inline int cc_debugfs_init(struct cc_drvdata *drvdata)
+{
+	return 0;
+}
+
+static inline void cc_debugfs_fini(struct cc_drvdata *drvdata) {}
+
+#endif
+
+#endif /*__CC_SYSFS_H__*/
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
new file mode 100644
index 000000000000..89ce013ae093
--- /dev/null
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/crypto.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+
+#include "cc_driver.h"
+#include "cc_request_mgr.h"
+#include "cc_buffer_mgr.h"
+#include "cc_debugfs.h"
+#include "cc_cipher.h"
+#include "cc_aead.h"
+#include "cc_hash.h"
+#include "cc_ivgen.h"
+#include "cc_sram_mgr.h"
+#include "cc_pm.h"
+#include "cc_fips.h"
+
+bool cc_dump_desc;
+module_param_named(dump_desc, cc_dump_desc, bool, 0600);
+MODULE_PARM_DESC(cc_dump_desc, "Dump descriptors to kernel log as debugging aid");
+
+bool cc_dump_bytes;
+module_param_named(dump_bytes, cc_dump_bytes, bool, 0600);
+MODULE_PARM_DESC(cc_dump_bytes, "Dump buffers to kernel log as debugging aid");
+
+struct cc_hw_data {
+	char *name;
+	enum cc_hw_rev rev;
+	u32 sig;
+};
+
+/* Hardware revisions defs. */
+
+static const struct cc_hw_data cc712_hw = {
+	.name = "712", .rev = CC_HW_REV_712, .sig =  0xDCC71200U
+};
+
+static const struct cc_hw_data cc710_hw = {
+	.name = "710", .rev = CC_HW_REV_710, .sig =  0xDCC63200U
+};
+
+static const struct cc_hw_data cc630p_hw = {
+	.name = "630P", .rev = CC_HW_REV_630, .sig = 0xDCC63000U
+};
+
+static const struct of_device_id arm_ccree_dev_of_match[] = {
+	{ .compatible = "arm,cryptocell-712-ree", .data = &cc712_hw },
+	{ .compatible = "arm,cryptocell-710-ree", .data = &cc710_hw },
+	{ .compatible = "arm,cryptocell-630p-ree", .data = &cc630p_hw },
+	{}
+};
+MODULE_DEVICE_TABLE(of, arm_ccree_dev_of_match);
+
+void __dump_byte_array(const char *name, const u8 *buf, size_t len)
+{
+	char prefix[64];
+
+	if (!buf)
+		return;
+
+	snprintf(prefix, sizeof(prefix), "%s[%zu]: ", name, len);
+
+	print_hex_dump(KERN_DEBUG, prefix, DUMP_PREFIX_ADDRESS, 16, 1, buf,
+		       len, false);
+}
+
+static irqreturn_t cc_isr(int irq, void *dev_id)
+{
+	struct cc_drvdata *drvdata = (struct cc_drvdata *)dev_id;
+	struct device *dev = drvdata_to_dev(drvdata);
+	u32 irr;
+	u32 imr;
+
+	/* STAT_OP_TYPE_GENERIC STAT_PHASE_0: Interrupt */
+
+	/* read the interrupt status */
+	irr = cc_ioread(drvdata, CC_REG(HOST_IRR));
+	dev_dbg(dev, "Got IRR=0x%08X\n", irr);
+	if (irr == 0) { /* Probably shared interrupt line */
+		dev_err(dev, "Got interrupt with empty IRR\n");
+		return IRQ_NONE;
+	}
+	imr = cc_ioread(drvdata, CC_REG(HOST_IMR));
+
+	/* clear interrupt - must be before processing events */
+	cc_iowrite(drvdata, CC_REG(HOST_ICR), irr);
+
+	drvdata->irq = irr;
+	/* Completion interrupt - most probable */
+	if (irr & CC_COMP_IRQ_MASK) {
+		/* Mask AXI completion interrupt - will be unmasked in
+		 * Deferred service handler
+		 */
+		cc_iowrite(drvdata, CC_REG(HOST_IMR), imr | CC_COMP_IRQ_MASK);
+		irr &= ~CC_COMP_IRQ_MASK;
+		complete_request(drvdata);
+	}
+#ifdef CONFIG_CRYPTO_FIPS
+	/* TEE FIPS interrupt */
+	if (irr & CC_GPR0_IRQ_MASK) {
+		/* Mask interrupt - will be unmasked in Deferred service
+		 * handler
+		 */
+		cc_iowrite(drvdata, CC_REG(HOST_IMR), imr | CC_GPR0_IRQ_MASK);
+		irr &= ~CC_GPR0_IRQ_MASK;
+		fips_handler(drvdata);
+	}
+#endif
+	/* AXI error interrupt */
+	if (irr & CC_AXI_ERR_IRQ_MASK) {
+		u32 axi_err;
+
+		/* Read the AXI error ID */
+		axi_err = cc_ioread(drvdata, CC_REG(AXIM_MON_ERR));
+		dev_dbg(dev, "AXI completion error: axim_mon_err=0x%08X\n",
+			axi_err);
+
+		irr &= ~CC_AXI_ERR_IRQ_MASK;
+	}
+
+	if (irr) {
+		dev_dbg(dev, "IRR includes unknown cause bits (0x%08X)\n",
+			irr);
+		/* Just warning */
+	}
+
+	return IRQ_HANDLED;
+}
+
+int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe)
+{
+	unsigned int val, cache_params;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	/* Unmask all AXI interrupt sources AXI_CFG1 register */
+	val = cc_ioread(drvdata, CC_REG(AXIM_CFG));
+	cc_iowrite(drvdata, CC_REG(AXIM_CFG), val & ~CC_AXI_IRQ_MASK);
+	dev_dbg(dev, "AXIM_CFG=0x%08X\n",
+		cc_ioread(drvdata, CC_REG(AXIM_CFG)));
+
+	/* Clear all pending interrupts */
+	val = cc_ioread(drvdata, CC_REG(HOST_IRR));
+	dev_dbg(dev, "IRR=0x%08X\n", val);
+	cc_iowrite(drvdata, CC_REG(HOST_ICR), val);
+
+	/* Unmask relevant interrupt cause */
+	val = CC_COMP_IRQ_MASK | CC_AXI_ERR_IRQ_MASK;
+
+	if (drvdata->hw_rev >= CC_HW_REV_712)
+		val |= CC_GPR0_IRQ_MASK;
+
+	cc_iowrite(drvdata, CC_REG(HOST_IMR), ~val);
+
+	cache_params = (drvdata->coherent ? CC_COHERENT_CACHE_PARAMS : 0x0);
+
+	val = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS));
+
+	if (is_probe)
+		dev_info(dev, "Cache params previous: 0x%08X\n", val);
+
+	cc_iowrite(drvdata, CC_REG(AXIM_CACHE_PARAMS), cache_params);
+	val = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS));
+
+	if (is_probe)
+		dev_info(dev, "Cache params current: 0x%08X (expect: 0x%08X)\n",
+			 val, cache_params);
+
+	return 0;
+}
+
+static int init_cc_resources(struct platform_device *plat_dev)
+{
+	struct resource *req_mem_cc_regs = NULL;
+	struct cc_drvdata *new_drvdata;
+	struct device *dev = &plat_dev->dev;
+	struct device_node *np = dev->of_node;
+	u32 signature_val;
+	u64 dma_mask;
+	const struct cc_hw_data *hw_rev;
+	const struct of_device_id *dev_id;
+	int rc = 0;
+
+	new_drvdata = devm_kzalloc(dev, sizeof(*new_drvdata), GFP_KERNEL);
+	if (!new_drvdata)
+		return -ENOMEM;
+
+	dev_id = of_match_node(arm_ccree_dev_of_match, np);
+	if (!dev_id)
+		return -ENODEV;
+
+	hw_rev = (struct cc_hw_data *)dev_id->data;
+	new_drvdata->hw_rev_name = hw_rev->name;
+	new_drvdata->hw_rev = hw_rev->rev;
+
+	if (hw_rev->rev >= CC_HW_REV_712) {
+		new_drvdata->hash_len_sz = HASH_LEN_SIZE_712;
+		new_drvdata->axim_mon_offset = CC_REG(AXIM_MON_COMP);
+	} else {
+		new_drvdata->hash_len_sz = HASH_LEN_SIZE_630;
+		new_drvdata->axim_mon_offset = CC_REG(AXIM_MON_COMP8);
+	}
+
+	platform_set_drvdata(plat_dev, new_drvdata);
+	new_drvdata->plat_dev = plat_dev;
+
+	new_drvdata->clk = of_clk_get(np, 0);
+	new_drvdata->coherent = of_dma_is_coherent(np);
+
+	/* Get device resources */
+	/* First CC registers space */
+	req_mem_cc_regs = platform_get_resource(plat_dev, IORESOURCE_MEM, 0);
+	/* Map registers space */
+	new_drvdata->cc_base = devm_ioremap_resource(dev, req_mem_cc_regs);
+	if (IS_ERR(new_drvdata->cc_base)) {
+		dev_err(dev, "Failed to ioremap registers");
+		return PTR_ERR(new_drvdata->cc_base);
+	}
+
+	dev_dbg(dev, "Got MEM resource (%s): %pR\n", req_mem_cc_regs->name,
+		req_mem_cc_regs);
+	dev_dbg(dev, "CC registers mapped from %pa to 0x%p\n",
+		&req_mem_cc_regs->start, new_drvdata->cc_base);
+
+	/* Then IRQ */
+	new_drvdata->irq = platform_get_irq(plat_dev, 0);
+	if (new_drvdata->irq < 0) {
+		dev_err(dev, "Failed getting IRQ resource\n");
+		return new_drvdata->irq;
+	}
+
+	rc = devm_request_irq(dev, new_drvdata->irq, cc_isr,
+			      IRQF_SHARED, "ccree", new_drvdata);
+	if (rc) {
+		dev_err(dev, "Could not register to interrupt %d\n",
+			new_drvdata->irq);
+		return rc;
+	}
+	dev_dbg(dev, "Registered to IRQ: %d\n", new_drvdata->irq);
+
+	init_completion(&new_drvdata->hw_queue_avail);
+
+	if (!plat_dev->dev.dma_mask)
+		plat_dev->dev.dma_mask = &plat_dev->dev.coherent_dma_mask;
+
+	dma_mask = DMA_BIT_MASK(DMA_BIT_MASK_LEN);
+	while (dma_mask > 0x7fffffffUL) {
+		if (dma_supported(&plat_dev->dev, dma_mask)) {
+			rc = dma_set_coherent_mask(&plat_dev->dev, dma_mask);
+			if (!rc)
+				break;
+		}
+		dma_mask >>= 1;
+	}
+
+	if (rc) {
+		dev_err(dev, "Failed in dma_set_mask, mask=%pad\n", &dma_mask);
+		return rc;
+	}
+
+	rc = cc_clk_on(new_drvdata);
+	if (rc) {
+		dev_err(dev, "Failed to enable clock");
+		return rc;
+	}
+
+	/* Verify correct mapping */
+	signature_val = cc_ioread(new_drvdata, CC_REG(HOST_SIGNATURE));
+	if (signature_val != hw_rev->sig) {
+		dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n",
+			signature_val, hw_rev->sig);
+		rc = -EINVAL;
+		goto post_clk_err;
+	}
+	dev_dbg(dev, "CC SIGNATURE=0x%08X\n", signature_val);
+
+	/* Display HW versions */
+	dev_info(dev, "ARM CryptoCell %s Driver: HW version 0x%08X, Driver version %s\n",
+		 hw_rev->name, cc_ioread(new_drvdata, CC_REG(HOST_VERSION)),
+		 DRV_MODULE_VERSION);
+
+	rc = init_cc_regs(new_drvdata, true);
+	if (rc) {
+		dev_err(dev, "init_cc_regs failed\n");
+		goto post_clk_err;
+	}
+
+	rc = cc_debugfs_init(new_drvdata);
+	if (rc) {
+		dev_err(dev, "Failed registering debugfs interface\n");
+		goto post_regs_err;
+	}
+
+	rc = cc_fips_init(new_drvdata);
+	if (rc) {
+		dev_err(dev, "CC_FIPS_INIT failed 0x%x\n", rc);
+		goto post_debugfs_err;
+	}
+	rc = cc_sram_mgr_init(new_drvdata);
+	if (rc) {
+		dev_err(dev, "cc_sram_mgr_init failed\n");
+		goto post_fips_init_err;
+	}
+
+	new_drvdata->mlli_sram_addr =
+		cc_sram_alloc(new_drvdata, MAX_MLLI_BUFF_SIZE);
+	if (new_drvdata->mlli_sram_addr == NULL_SRAM_ADDR) {
+		dev_err(dev, "Failed to alloc MLLI Sram buffer\n");
+		rc = -ENOMEM;
+		goto post_sram_mgr_err;
+	}
+
+	rc = cc_req_mgr_init(new_drvdata);
+	if (rc) {
+		dev_err(dev, "cc_req_mgr_init failed\n");
+		goto post_sram_mgr_err;
+	}
+
+	rc = cc_buffer_mgr_init(new_drvdata);
+	if (rc) {
+		dev_err(dev, "buffer_mgr_init failed\n");
+		goto post_req_mgr_err;
+	}
+
+	rc = cc_pm_init(new_drvdata);
+	if (rc) {
+		dev_err(dev, "ssi_power_mgr_init failed\n");
+		goto post_buf_mgr_err;
+	}
+
+	rc = cc_ivgen_init(new_drvdata);
+	if (rc) {
+		dev_err(dev, "cc_ivgen_init failed\n");
+		goto post_power_mgr_err;
+	}
+
+	/* Allocate crypto algs */
+	rc = cc_cipher_alloc(new_drvdata);
+	if (rc) {
+		dev_err(dev, "cc_cipher_alloc failed\n");
+		goto post_ivgen_err;
+	}
+
+	/* hash must be allocated before aead since hash exports APIs */
+	rc = cc_hash_alloc(new_drvdata);
+	if (rc) {
+		dev_err(dev, "cc_hash_alloc failed\n");
+		goto post_cipher_err;
+	}
+
+	rc = cc_aead_alloc(new_drvdata);
+	if (rc) {
+		dev_err(dev, "cc_aead_alloc failed\n");
+		goto post_hash_err;
+	}
+
+	/* If we got here and FIPS mode is enabled
+	 * it means all FIPS test passed, so let TEE
+	 * know we're good.
+	 */
+	cc_set_ree_fips_status(new_drvdata, true);
+
+	return 0;
+
+post_hash_err:
+	cc_hash_free(new_drvdata);
+post_cipher_err:
+	cc_cipher_free(new_drvdata);
+post_ivgen_err:
+	cc_ivgen_fini(new_drvdata);
+post_power_mgr_err:
+	cc_pm_fini(new_drvdata);
+post_buf_mgr_err:
+	 cc_buffer_mgr_fini(new_drvdata);
+post_req_mgr_err:
+	cc_req_mgr_fini(new_drvdata);
+post_sram_mgr_err:
+	cc_sram_mgr_fini(new_drvdata);
+post_fips_init_err:
+	cc_fips_fini(new_drvdata);
+post_debugfs_err:
+	cc_debugfs_fini(new_drvdata);
+post_regs_err:
+	fini_cc_regs(new_drvdata);
+post_clk_err:
+	cc_clk_off(new_drvdata);
+	return rc;
+}
+
+void fini_cc_regs(struct cc_drvdata *drvdata)
+{
+	/* Mask all interrupts */
+	cc_iowrite(drvdata, CC_REG(HOST_IMR), 0xFFFFFFFF);
+}
+
+static void cleanup_cc_resources(struct platform_device *plat_dev)
+{
+	struct cc_drvdata *drvdata =
+		(struct cc_drvdata *)platform_get_drvdata(plat_dev);
+
+	cc_aead_free(drvdata);
+	cc_hash_free(drvdata);
+	cc_cipher_free(drvdata);
+	cc_ivgen_fini(drvdata);
+	cc_pm_fini(drvdata);
+	cc_buffer_mgr_fini(drvdata);
+	cc_req_mgr_fini(drvdata);
+	cc_sram_mgr_fini(drvdata);
+	cc_fips_fini(drvdata);
+	cc_debugfs_fini(drvdata);
+	fini_cc_regs(drvdata);
+	cc_clk_off(drvdata);
+}
+
+int cc_clk_on(struct cc_drvdata *drvdata)
+{
+	struct clk *clk = drvdata->clk;
+	int rc;
+
+	if (IS_ERR(clk))
+		/* Not all devices have a clock associated with CCREE  */
+		return 0;
+
+	rc = clk_prepare_enable(clk);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+void cc_clk_off(struct cc_drvdata *drvdata)
+{
+	struct clk *clk = drvdata->clk;
+
+	if (IS_ERR(clk))
+		/* Not all devices have a clock associated with CCREE */
+		return;
+
+	clk_disable_unprepare(clk);
+}
+
+static int ccree_probe(struct platform_device *plat_dev)
+{
+	int rc;
+	struct device *dev = &plat_dev->dev;
+
+	/* Map registers space */
+	rc = init_cc_resources(plat_dev);
+	if (rc)
+		return rc;
+
+	dev_info(dev, "ARM ccree device initialized\n");
+
+	return 0;
+}
+
+static int ccree_remove(struct platform_device *plat_dev)
+{
+	struct device *dev = &plat_dev->dev;
+
+	dev_dbg(dev, "Releasing ccree resources...\n");
+
+	cleanup_cc_resources(plat_dev);
+
+	dev_info(dev, "ARM ccree device terminated\n");
+
+	return 0;
+}
+
+static struct platform_driver ccree_driver = {
+	.driver = {
+		   .name = "ccree",
+		   .of_match_table = arm_ccree_dev_of_match,
+#ifdef CONFIG_PM
+		   .pm = &ccree_pm,
+#endif
+	},
+	.probe = ccree_probe,
+	.remove = ccree_remove,
+};
+
+static int __init ccree_init(void)
+{
+	int ret;
+
+	cc_hash_global_init();
+
+	ret = cc_debugfs_global_init();
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&ccree_driver);
+}
+module_init(ccree_init);
+
+static void __exit ccree_exit(void)
+{
+	platform_driver_unregister(&ccree_driver);
+	cc_debugfs_global_fini();
+}
+module_exit(ccree_exit);
+
+/* Module description */
+MODULE_DESCRIPTION("ARM TrustZone CryptoCell REE Driver");
+MODULE_VERSION(DRV_MODULE_VERSION);
+MODULE_AUTHOR("ARM");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h
new file mode 100644
index 000000000000..2048fdeb9579
--- /dev/null
+++ b/drivers/crypto/ccree/cc_driver.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+/* \file cc_driver.h
+ * ARM CryptoCell Linux Crypto Driver
+ */
+
+#ifndef __CC_DRIVER_H__
+#define __CC_DRIVER_H__
+
+#ifdef COMP_IN_WQ
+#include <linux/workqueue.h>
+#else
+#include <linux/interrupt.h>
+#endif
+#include <linux/dma-mapping.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aes.h>
+#include <crypto/sha.h>
+#include <crypto/aead.h>
+#include <crypto/authenc.h>
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
+#include <linux/version.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+/* Registers definitions from shared/hw/ree_include */
+#include "cc_host_regs.h"
+#define CC_DEV_SHA_MAX 512
+#include "cc_crypto_ctx.h"
+#include "cc_hw_queue_defs.h"
+#include "cc_sram_mgr.h"
+
+extern bool cc_dump_desc;
+extern bool cc_dump_bytes;
+
+#define DRV_MODULE_VERSION "4.0"
+
+enum cc_hw_rev {
+	CC_HW_REV_630 = 630,
+	CC_HW_REV_710 = 710,
+	CC_HW_REV_712 = 712
+};
+
+#define CC_COHERENT_CACHE_PARAMS 0xEEE
+
+/* Maximum DMA mask supported by IP */
+#define DMA_BIT_MASK_LEN 48
+
+#define CC_AXI_IRQ_MASK ((1 << CC_AXIM_CFG_BRESPMASK_BIT_SHIFT) | \
+			  (1 << CC_AXIM_CFG_RRESPMASK_BIT_SHIFT) | \
+			  (1 << CC_AXIM_CFG_INFLTMASK_BIT_SHIFT) | \
+			  (1 << CC_AXIM_CFG_COMPMASK_BIT_SHIFT))
+
+#define CC_AXI_ERR_IRQ_MASK BIT(CC_HOST_IRR_AXI_ERR_INT_BIT_SHIFT)
+
+#define CC_COMP_IRQ_MASK BIT(CC_HOST_IRR_AXIM_COMP_INT_BIT_SHIFT)
+
+#define AXIM_MON_COMP_VALUE GENMASK(CC_AXIM_MON_COMP_VALUE_BIT_SIZE + \
+				    CC_AXIM_MON_COMP_VALUE_BIT_SHIFT, \
+				    CC_AXIM_MON_COMP_VALUE_BIT_SHIFT)
+
+/* Register name mangling macro */
+#define CC_REG(reg_name) CC_ ## reg_name ## _REG_OFFSET
+
+/* TEE FIPS status interrupt */
+#define CC_GPR0_IRQ_MASK BIT(CC_HOST_IRR_GPR0_BIT_SHIFT)
+
+#define CC_CRA_PRIO 400
+
+#define MIN_HW_QUEUE_SIZE 50 /* Minimum size required for proper function */
+
+#define MAX_REQUEST_QUEUE_SIZE 4096
+#define MAX_MLLI_BUFF_SIZE 2080
+#define MAX_ICV_NENTS_SUPPORTED 2
+
+/* Definitions for HW descriptors DIN/DOUT fields */
+#define NS_BIT 1
+#define AXI_ID 0
+/* AXI_ID is not actually the AXI ID of the transaction but the value of AXI_ID
+ * field in the HW descriptor. The DMA engine +8 that value.
+ */
+
+#define CC_MAX_IVGEN_DMA_ADDRESSES	3
+struct cc_crypto_req {
+	void (*user_cb)(struct device *dev, void *req, int err);
+	void *user_arg;
+	dma_addr_t ivgen_dma_addr[CC_MAX_IVGEN_DMA_ADDRESSES];
+	/* For the first 'ivgen_dma_addr_len' addresses of this array,
+	 * generated IV would be placed in it by send_request().
+	 * Same generated IV for all addresses!
+	 */
+	/* Amount of 'ivgen_dma_addr' elements to be filled. */
+	unsigned int ivgen_dma_addr_len;
+	/* The generated IV size required, 8/16 B allowed. */
+	unsigned int ivgen_size;
+	struct completion seq_compl; /* request completion */
+};
+
+/**
+ * struct cc_drvdata - driver private data context
+ * @cc_base:	virt address of the CC registers
+ * @irq:	device IRQ number
+ * @irq_mask:	Interrupt mask shadow (1 for masked interrupts)
+ * @fw_ver:	SeP loaded firmware version
+ */
+struct cc_drvdata {
+	void __iomem *cc_base;
+	int irq;
+	u32 irq_mask;
+	u32 fw_ver;
+	struct completion hw_queue_avail; /* wait for HW queue availability */
+	struct platform_device *plat_dev;
+	cc_sram_addr_t mlli_sram_addr;
+	void *buff_mgr_handle;
+	void *cipher_handle;
+	void *hash_handle;
+	void *aead_handle;
+	void *request_mgr_handle;
+	void *fips_handle;
+	void *ivgen_handle;
+	void *sram_mgr_handle;
+	void *debugfs;
+	struct clk *clk;
+	bool coherent;
+	char *hw_rev_name;
+	enum cc_hw_rev hw_rev;
+	u32 hash_len_sz;
+	u32 axim_mon_offset;
+};
+
+struct cc_crypto_alg {
+	struct list_head entry;
+	int cipher_mode;
+	int flow_mode; /* Note: currently, refers to the cipher mode only. */
+	int auth_mode;
+	unsigned int data_unit;
+	struct cc_drvdata *drvdata;
+	struct skcipher_alg skcipher_alg;
+	struct aead_alg aead_alg;
+};
+
+struct cc_alg_template {
+	char name[CRYPTO_MAX_ALG_NAME];
+	char driver_name[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	u32 type;
+	union {
+		struct skcipher_alg skcipher;
+		struct aead_alg aead;
+	} template_u;
+	int cipher_mode;
+	int flow_mode; /* Note: currently, refers to the cipher mode only. */
+	int auth_mode;
+	u32 min_hw_rev;
+	unsigned int data_unit;
+	struct cc_drvdata *drvdata;
+};
+
+struct async_gen_req_ctx {
+	dma_addr_t iv_dma_addr;
+	enum drv_crypto_direction op_type;
+};
+
+static inline struct device *drvdata_to_dev(struct cc_drvdata *drvdata)
+{
+	return &drvdata->plat_dev->dev;
+}
+
+void __dump_byte_array(const char *name, const u8 *buf, size_t len);
+static inline void dump_byte_array(const char *name, const u8 *the_array,
+				   size_t size)
+{
+	if (cc_dump_bytes)
+		__dump_byte_array(name, the_array, size);
+}
+
+int init_cc_regs(struct cc_drvdata *drvdata, bool is_probe);
+void fini_cc_regs(struct cc_drvdata *drvdata);
+int cc_clk_on(struct cc_drvdata *drvdata);
+void cc_clk_off(struct cc_drvdata *drvdata);
+
+static inline void cc_iowrite(struct cc_drvdata *drvdata, u32 reg, u32 val)
+{
+	iowrite32(val, (drvdata->cc_base + reg));
+}
+
+static inline u32 cc_ioread(struct cc_drvdata *drvdata, u32 reg)
+{
+	return ioread32(drvdata->cc_base + reg);
+}
+
+static inline gfp_t cc_gfp_flags(struct crypto_async_request *req)
+{
+	return (req->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+			GFP_KERNEL : GFP_ATOMIC;
+}
+
+static inline void set_queue_last_ind(struct cc_drvdata *drvdata,
+				      struct cc_hw_desc *pdesc)
+{
+	if (drvdata->hw_rev >= CC_HW_REV_712)
+		set_queue_last_ind_bit(pdesc);
+}
+
+#endif /*__CC_DRIVER_H__*/
diff --git a/drivers/crypto/ccree/cc_fips.c b/drivers/crypto/ccree/cc_fips.c
new file mode 100644
index 000000000000..b4d0a6d983e0
--- /dev/null
+++ b/drivers/crypto/ccree/cc_fips.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include <linux/fips.h>
+
+#include "cc_driver.h"
+#include "cc_fips.h"
+
+static void fips_dsr(unsigned long devarg);
+
+struct cc_fips_handle {
+	struct tasklet_struct tasklet;
+};
+
+/* The function called once at driver entry point to check
+ * whether TEE FIPS error occurred.
+ */
+static bool cc_get_tee_fips_status(struct cc_drvdata *drvdata)
+{
+	u32 reg;
+
+	reg = cc_ioread(drvdata, CC_REG(GPR_HOST));
+	return (reg == (CC_FIPS_SYNC_TEE_STATUS | CC_FIPS_SYNC_MODULE_OK));
+}
+
+/*
+ * This function should push the FIPS REE library status towards the TEE library
+ * by writing the error state to HOST_GPR0 register.
+ */
+void cc_set_ree_fips_status(struct cc_drvdata *drvdata, bool status)
+{
+	int val = CC_FIPS_SYNC_REE_STATUS;
+
+	if (drvdata->hw_rev < CC_HW_REV_712)
+		return;
+
+	val |= (status ? CC_FIPS_SYNC_MODULE_OK : CC_FIPS_SYNC_MODULE_ERROR);
+
+	cc_iowrite(drvdata, CC_REG(HOST_GPR0), val);
+}
+
+void cc_fips_fini(struct cc_drvdata *drvdata)
+{
+	struct cc_fips_handle *fips_h = drvdata->fips_handle;
+
+	if (drvdata->hw_rev < CC_HW_REV_712 || !fips_h)
+		return;
+
+	/* Kill tasklet */
+	tasklet_kill(&fips_h->tasklet);
+
+	kfree(fips_h);
+	drvdata->fips_handle = NULL;
+}
+
+void fips_handler(struct cc_drvdata *drvdata)
+{
+	struct cc_fips_handle *fips_handle_ptr = drvdata->fips_handle;
+
+	if (drvdata->hw_rev < CC_HW_REV_712)
+		return;
+
+	tasklet_schedule(&fips_handle_ptr->tasklet);
+}
+
+static inline void tee_fips_error(struct device *dev)
+{
+	if (fips_enabled)
+		panic("ccree: TEE reported cryptographic error in fips mode!\n");
+	else
+		dev_err(dev, "TEE reported error!\n");
+}
+
+/* Deferred service handler, run as interrupt-fired tasklet */
+static void fips_dsr(unsigned long devarg)
+{
+	struct cc_drvdata *drvdata = (struct cc_drvdata *)devarg;
+	struct device *dev = drvdata_to_dev(drvdata);
+	u32 irq, state, val;
+
+	irq = (drvdata->irq & (CC_GPR0_IRQ_MASK));
+
+	if (irq) {
+		state = cc_ioread(drvdata, CC_REG(GPR_HOST));
+
+		if (state != (CC_FIPS_SYNC_TEE_STATUS | CC_FIPS_SYNC_MODULE_OK))
+			tee_fips_error(dev);
+	}
+
+	/* after verifing that there is nothing to do,
+	 * unmask AXI completion interrupt.
+	 */
+	val = (CC_REG(HOST_IMR) & ~irq);
+	cc_iowrite(drvdata, CC_REG(HOST_IMR), val);
+}
+
+/* The function called once at driver entry point .*/
+int cc_fips_init(struct cc_drvdata *p_drvdata)
+{
+	struct cc_fips_handle *fips_h;
+	struct device *dev = drvdata_to_dev(p_drvdata);
+
+	if (p_drvdata->hw_rev < CC_HW_REV_712)
+		return 0;
+
+	fips_h = kzalloc(sizeof(*fips_h), GFP_KERNEL);
+	if (!fips_h)
+		return -ENOMEM;
+
+	p_drvdata->fips_handle = fips_h;
+
+	dev_dbg(dev, "Initializing fips tasklet\n");
+	tasklet_init(&fips_h->tasklet, fips_dsr, (unsigned long)p_drvdata);
+
+	if (!cc_get_tee_fips_status(p_drvdata))
+		tee_fips_error(dev);
+
+	return 0;
+}
diff --git a/drivers/crypto/ccree/cc_fips.h b/drivers/crypto/ccree/cc_fips.h
new file mode 100644
index 000000000000..645e096a7a82
--- /dev/null
+++ b/drivers/crypto/ccree/cc_fips.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef __CC_FIPS_H__
+#define __CC_FIPS_H__
+
+#ifdef CONFIG_CRYPTO_FIPS
+
+enum cc_fips_status {
+	CC_FIPS_SYNC_MODULE_OK = 0x0,
+	CC_FIPS_SYNC_MODULE_ERROR = 0x1,
+	CC_FIPS_SYNC_REE_STATUS = 0x4,
+	CC_FIPS_SYNC_TEE_STATUS = 0x8,
+	CC_FIPS_SYNC_STATUS_RESERVE32B = S32_MAX
+};
+
+int cc_fips_init(struct cc_drvdata *p_drvdata);
+void cc_fips_fini(struct cc_drvdata *drvdata);
+void fips_handler(struct cc_drvdata *drvdata);
+void cc_set_ree_fips_status(struct cc_drvdata *drvdata, bool ok);
+
+#else  /* CONFIG_CRYPTO_FIPS */
+
+static inline int cc_fips_init(struct cc_drvdata *p_drvdata)
+{
+	return 0;
+}
+
+static inline void cc_fips_fini(struct cc_drvdata *drvdata) {}
+static inline void cc_set_ree_fips_status(struct cc_drvdata *drvdata,
+					  bool ok) {}
+static inline void fips_handler(struct cc_drvdata *drvdata) {}
+
+#endif /* CONFIG_CRYPTO_FIPS */
+
+#endif  /*__CC_FIPS_H__*/
diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c
new file mode 100644
index 000000000000..96ff777474d7
--- /dev/null
+++ b/drivers/crypto/ccree/cc_hash.c
@@ -0,0 +1,2296 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/md5.h>
+#include <crypto/internal/hash.h>
+
+#include "cc_driver.h"
+#include "cc_request_mgr.h"
+#include "cc_buffer_mgr.h"
+#include "cc_hash.h"
+#include "cc_sram_mgr.h"
+
+#define CC_MAX_HASH_SEQ_LEN 12
+#define CC_MAX_OPAD_KEYS_SIZE CC_MAX_HASH_BLCK_SIZE
+
+struct cc_hash_handle {
+	cc_sram_addr_t digest_len_sram_addr; /* const value in SRAM*/
+	cc_sram_addr_t larval_digest_sram_addr;   /* const value in SRAM */
+	struct list_head hash_list;
+};
+
+static const u32 digest_len_init[] = {
+	0x00000040, 0x00000000, 0x00000000, 0x00000000 };
+static const u32 md5_init[] = {
+	SHA1_H3, SHA1_H2, SHA1_H1, SHA1_H0 };
+static const u32 sha1_init[] = {
+	SHA1_H4, SHA1_H3, SHA1_H2, SHA1_H1, SHA1_H0 };
+static const u32 sha224_init[] = {
+	SHA224_H7, SHA224_H6, SHA224_H5, SHA224_H4,
+	SHA224_H3, SHA224_H2, SHA224_H1, SHA224_H0 };
+static const u32 sha256_init[] = {
+	SHA256_H7, SHA256_H6, SHA256_H5, SHA256_H4,
+	SHA256_H3, SHA256_H2, SHA256_H1, SHA256_H0 };
+static const u32 digest_len_sha512_init[] = {
+	0x00000080, 0x00000000, 0x00000000, 0x00000000 };
+static u64 sha384_init[] = {
+	SHA384_H7, SHA384_H6, SHA384_H5, SHA384_H4,
+	SHA384_H3, SHA384_H2, SHA384_H1, SHA384_H0 };
+static u64 sha512_init[] = {
+	SHA512_H7, SHA512_H6, SHA512_H5, SHA512_H4,
+	SHA512_H3, SHA512_H2, SHA512_H1, SHA512_H0 };
+
+static void cc_setup_xcbc(struct ahash_request *areq, struct cc_hw_desc desc[],
+			  unsigned int *seq_size);
+
+static void cc_setup_cmac(struct ahash_request *areq, struct cc_hw_desc desc[],
+			  unsigned int *seq_size);
+
+static const void *cc_larval_digest(struct device *dev, u32 mode);
+
+struct cc_hash_alg {
+	struct list_head entry;
+	int hash_mode;
+	int hw_mode;
+	int inter_digestsize;
+	struct cc_drvdata *drvdata;
+	struct ahash_alg ahash_alg;
+};
+
+struct hash_key_req_ctx {
+	u32 keylen;
+	dma_addr_t key_dma_addr;
+};
+
+/* hash per-session context */
+struct cc_hash_ctx {
+	struct cc_drvdata *drvdata;
+	/* holds the origin digest; the digest after "setkey" if HMAC,*
+	 * the initial digest if HASH.
+	 */
+	u8 digest_buff[CC_MAX_HASH_DIGEST_SIZE]  ____cacheline_aligned;
+	u8 opad_tmp_keys_buff[CC_MAX_OPAD_KEYS_SIZE]  ____cacheline_aligned;
+
+	dma_addr_t opad_tmp_keys_dma_addr  ____cacheline_aligned;
+	dma_addr_t digest_buff_dma_addr;
+	/* use for hmac with key large then mode block size */
+	struct hash_key_req_ctx key_params;
+	int hash_mode;
+	int hw_mode;
+	int inter_digestsize;
+	struct completion setkey_comp;
+	bool is_hmac;
+};
+
+static void cc_set_desc(struct ahash_req_ctx *areq_ctx, struct cc_hash_ctx *ctx,
+			unsigned int flow_mode, struct cc_hw_desc desc[],
+			bool is_not_last_data, unsigned int *seq_size);
+
+static void cc_set_endianity(u32 mode, struct cc_hw_desc *desc)
+{
+	if (mode == DRV_HASH_MD5 || mode == DRV_HASH_SHA384 ||
+	    mode == DRV_HASH_SHA512) {
+		set_bytes_swap(desc, 1);
+	} else {
+		set_cipher_config0(desc, HASH_DIGEST_RESULT_LITTLE_ENDIAN);
+	}
+}
+
+static int cc_map_result(struct device *dev, struct ahash_req_ctx *state,
+			 unsigned int digestsize)
+{
+	state->digest_result_dma_addr =
+		dma_map_single(dev, state->digest_result_buff,
+			       digestsize, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, state->digest_result_dma_addr)) {
+		dev_err(dev, "Mapping digest result buffer %u B for DMA failed\n",
+			digestsize);
+		return -ENOMEM;
+	}
+	dev_dbg(dev, "Mapped digest result buffer %u B at va=%pK to dma=%pad\n",
+		digestsize, state->digest_result_buff,
+		&state->digest_result_dma_addr);
+
+	return 0;
+}
+
+static void cc_init_req(struct device *dev, struct ahash_req_ctx *state,
+			struct cc_hash_ctx *ctx)
+{
+	bool is_hmac = ctx->is_hmac;
+
+	memset(state, 0, sizeof(*state));
+
+	if (is_hmac) {
+		if (ctx->hw_mode != DRV_CIPHER_XCBC_MAC &&
+		    ctx->hw_mode != DRV_CIPHER_CMAC) {
+			dma_sync_single_for_cpu(dev, ctx->digest_buff_dma_addr,
+						ctx->inter_digestsize,
+						DMA_BIDIRECTIONAL);
+
+			memcpy(state->digest_buff, ctx->digest_buff,
+			       ctx->inter_digestsize);
+			if (ctx->hash_mode == DRV_HASH_SHA512 ||
+			    ctx->hash_mode == DRV_HASH_SHA384)
+				memcpy(state->digest_bytes_len,
+				       digest_len_sha512_init,
+				       ctx->drvdata->hash_len_sz);
+			else
+				memcpy(state->digest_bytes_len, digest_len_init,
+				       ctx->drvdata->hash_len_sz);
+		}
+
+		if (ctx->hash_mode != DRV_HASH_NULL) {
+			dma_sync_single_for_cpu(dev,
+						ctx->opad_tmp_keys_dma_addr,
+						ctx->inter_digestsize,
+						DMA_BIDIRECTIONAL);
+			memcpy(state->opad_digest_buff,
+			       ctx->opad_tmp_keys_buff, ctx->inter_digestsize);
+		}
+	} else { /*hash*/
+		/* Copy the initial digests if hash flow. */
+		const void *larval = cc_larval_digest(dev, ctx->hash_mode);
+
+		memcpy(state->digest_buff, larval, ctx->inter_digestsize);
+	}
+}
+
+static int cc_map_req(struct device *dev, struct ahash_req_ctx *state,
+		      struct cc_hash_ctx *ctx)
+{
+	bool is_hmac = ctx->is_hmac;
+
+	state->digest_buff_dma_addr =
+		dma_map_single(dev, state->digest_buff,
+			       ctx->inter_digestsize, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, state->digest_buff_dma_addr)) {
+		dev_err(dev, "Mapping digest len %d B at va=%pK for DMA failed\n",
+			ctx->inter_digestsize, state->digest_buff);
+		return -EINVAL;
+	}
+	dev_dbg(dev, "Mapped digest %d B at va=%pK to dma=%pad\n",
+		ctx->inter_digestsize, state->digest_buff,
+		&state->digest_buff_dma_addr);
+
+	if (ctx->hw_mode != DRV_CIPHER_XCBC_MAC) {
+		state->digest_bytes_len_dma_addr =
+			dma_map_single(dev, state->digest_bytes_len,
+				       HASH_MAX_LEN_SIZE, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, state->digest_bytes_len_dma_addr)) {
+			dev_err(dev, "Mapping digest len %u B at va=%pK for DMA failed\n",
+				HASH_MAX_LEN_SIZE, state->digest_bytes_len);
+			goto unmap_digest_buf;
+		}
+		dev_dbg(dev, "Mapped digest len %u B at va=%pK to dma=%pad\n",
+			HASH_MAX_LEN_SIZE, state->digest_bytes_len,
+			&state->digest_bytes_len_dma_addr);
+	}
+
+	if (is_hmac && ctx->hash_mode != DRV_HASH_NULL) {
+		state->opad_digest_dma_addr =
+			dma_map_single(dev, state->opad_digest_buff,
+				       ctx->inter_digestsize,
+				       DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, state->opad_digest_dma_addr)) {
+			dev_err(dev, "Mapping opad digest %d B at va=%pK for DMA failed\n",
+				ctx->inter_digestsize,
+				state->opad_digest_buff);
+			goto unmap_digest_len;
+		}
+		dev_dbg(dev, "Mapped opad digest %d B at va=%pK to dma=%pad\n",
+			ctx->inter_digestsize, state->opad_digest_buff,
+			&state->opad_digest_dma_addr);
+	}
+
+	return 0;
+
+unmap_digest_len:
+	if (state->digest_bytes_len_dma_addr) {
+		dma_unmap_single(dev, state->digest_bytes_len_dma_addr,
+				 HASH_MAX_LEN_SIZE, DMA_BIDIRECTIONAL);
+		state->digest_bytes_len_dma_addr = 0;
+	}
+unmap_digest_buf:
+	if (state->digest_buff_dma_addr) {
+		dma_unmap_single(dev, state->digest_buff_dma_addr,
+				 ctx->inter_digestsize, DMA_BIDIRECTIONAL);
+		state->digest_buff_dma_addr = 0;
+	}
+
+	return -EINVAL;
+}
+
+static void cc_unmap_req(struct device *dev, struct ahash_req_ctx *state,
+			 struct cc_hash_ctx *ctx)
+{
+	if (state->digest_buff_dma_addr) {
+		dma_unmap_single(dev, state->digest_buff_dma_addr,
+				 ctx->inter_digestsize, DMA_BIDIRECTIONAL);
+		dev_dbg(dev, "Unmapped digest-buffer: digest_buff_dma_addr=%pad\n",
+			&state->digest_buff_dma_addr);
+		state->digest_buff_dma_addr = 0;
+	}
+	if (state->digest_bytes_len_dma_addr) {
+		dma_unmap_single(dev, state->digest_bytes_len_dma_addr,
+				 HASH_MAX_LEN_SIZE, DMA_BIDIRECTIONAL);
+		dev_dbg(dev, "Unmapped digest-bytes-len buffer: digest_bytes_len_dma_addr=%pad\n",
+			&state->digest_bytes_len_dma_addr);
+		state->digest_bytes_len_dma_addr = 0;
+	}
+	if (state->opad_digest_dma_addr) {
+		dma_unmap_single(dev, state->opad_digest_dma_addr,
+				 ctx->inter_digestsize, DMA_BIDIRECTIONAL);
+		dev_dbg(dev, "Unmapped opad-digest: opad_digest_dma_addr=%pad\n",
+			&state->opad_digest_dma_addr);
+		state->opad_digest_dma_addr = 0;
+	}
+}
+
+static void cc_unmap_result(struct device *dev, struct ahash_req_ctx *state,
+			    unsigned int digestsize, u8 *result)
+{
+	if (state->digest_result_dma_addr) {
+		dma_unmap_single(dev, state->digest_result_dma_addr, digestsize,
+				 DMA_BIDIRECTIONAL);
+		dev_dbg(dev, "unmpa digest result buffer va (%pK) pa (%pad) len %u\n",
+			state->digest_result_buff,
+			&state->digest_result_dma_addr, digestsize);
+		memcpy(result, state->digest_result_buff, digestsize);
+	}
+	state->digest_result_dma_addr = 0;
+}
+
+static void cc_update_complete(struct device *dev, void *cc_req, int err)
+{
+	struct ahash_request *req = (struct ahash_request *)cc_req;
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	dev_dbg(dev, "req=%pK\n", req);
+
+	cc_unmap_hash_request(dev, state, req->src, false);
+	cc_unmap_req(dev, state, ctx);
+	req->base.complete(&req->base, err);
+}
+
+static void cc_digest_complete(struct device *dev, void *cc_req, int err)
+{
+	struct ahash_request *req = (struct ahash_request *)cc_req;
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+
+	dev_dbg(dev, "req=%pK\n", req);
+
+	cc_unmap_hash_request(dev, state, req->src, false);
+	cc_unmap_result(dev, state, digestsize, req->result);
+	cc_unmap_req(dev, state, ctx);
+	req->base.complete(&req->base, err);
+}
+
+static void cc_hash_complete(struct device *dev, void *cc_req, int err)
+{
+	struct ahash_request *req = (struct ahash_request *)cc_req;
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+
+	dev_dbg(dev, "req=%pK\n", req);
+
+	cc_unmap_hash_request(dev, state, req->src, false);
+	cc_unmap_result(dev, state, digestsize, req->result);
+	cc_unmap_req(dev, state, ctx);
+	req->base.complete(&req->base, err);
+}
+
+static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req,
+			 int idx)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+
+	/* Get final MAC result */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	/* TODO */
+	set_dout_dlli(&desc[idx], state->digest_result_dma_addr, digestsize,
+		      NS_BIT, 1);
+	set_queue_last_ind(ctx->drvdata, &desc[idx]);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	set_cipher_config1(&desc[idx], HASH_PADDING_DISABLED);
+	cc_set_endianity(ctx->hash_mode, &desc[idx]);
+	idx++;
+
+	return idx;
+}
+
+static int cc_fin_hmac(struct cc_hw_desc *desc, struct ahash_request *req,
+		       int idx)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+
+	/* store the hash digest result in the context */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_dout_dlli(&desc[idx], state->digest_buff_dma_addr, digestsize,
+		      NS_BIT, 0);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	cc_set_endianity(ctx->hash_mode, &desc[idx]);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	idx++;
+
+	/* Loading hash opad xor key state */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_din_type(&desc[idx], DMA_DLLI, state->opad_digest_dma_addr,
+		     ctx->inter_digestsize, NS_BIT);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	idx++;
+
+	/* Load the hash current length */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_din_sram(&desc[idx],
+		     cc_digest_len_addr(ctx->drvdata, ctx->hash_mode),
+		     ctx->drvdata->hash_len_sz);
+	set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	idx++;
+
+	/* Memory Barrier: wait for IPAD/OPAD axi write to complete */
+	hw_desc_init(&desc[idx]);
+	set_din_no_dma(&desc[idx], 0, 0xfffff0);
+	set_dout_no_dma(&desc[idx], 0, 0, 1);
+	idx++;
+
+	/* Perform HASH update */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr,
+		     digestsize, NS_BIT);
+	set_flow_mode(&desc[idx], DIN_HASH);
+	idx++;
+
+	return idx;
+}
+
+static int cc_hash_digest(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+	struct scatterlist *src = req->src;
+	unsigned int nbytes = req->nbytes;
+	u8 *result = req->result;
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	bool is_hmac = ctx->is_hmac;
+	struct cc_crypto_req cc_req = {};
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	cc_sram_addr_t larval_digest_addr =
+		cc_larval_digest_addr(ctx->drvdata, ctx->hash_mode);
+	int idx = 0;
+	int rc = 0;
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	dev_dbg(dev, "===== %s-digest (%d) ====\n", is_hmac ? "hmac" : "hash",
+		nbytes);
+
+	cc_init_req(dev, state, ctx);
+
+	if (cc_map_req(dev, state, ctx)) {
+		dev_err(dev, "map_ahash_source() failed\n");
+		return -ENOMEM;
+	}
+
+	if (cc_map_result(dev, state, digestsize)) {
+		dev_err(dev, "map_ahash_digest() failed\n");
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	if (cc_map_hash_request_final(ctx->drvdata, state, src, nbytes, 1,
+				      flags)) {
+		dev_err(dev, "map_ahash_request_final() failed\n");
+		cc_unmap_result(dev, state, digestsize, result);
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = cc_digest_complete;
+	cc_req.user_arg = req;
+
+	/* If HMAC then load hash IPAD xor key, if HASH then load initial
+	 * digest
+	 */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	if (is_hmac) {
+		set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr,
+			     ctx->inter_digestsize, NS_BIT);
+	} else {
+		set_din_sram(&desc[idx], larval_digest_addr,
+			     ctx->inter_digestsize);
+	}
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	idx++;
+
+	/* Load the hash current length */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+
+	if (is_hmac) {
+		set_din_type(&desc[idx], DMA_DLLI,
+			     state->digest_bytes_len_dma_addr,
+			     ctx->drvdata->hash_len_sz, NS_BIT);
+	} else {
+		set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz);
+		if (nbytes)
+			set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED);
+		else
+			set_cipher_do(&desc[idx], DO_PAD);
+	}
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	idx++;
+
+	cc_set_desc(state, ctx, DIN_HASH, desc, false, &idx);
+
+	if (is_hmac) {
+		/* HW last hash block padding (aka. "DO_PAD") */
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		set_dout_dlli(&desc[idx], state->digest_buff_dma_addr,
+			      ctx->drvdata->hash_len_sz, NS_BIT, 0);
+		set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+		set_setup_mode(&desc[idx], SETUP_WRITE_STATE1);
+		set_cipher_do(&desc[idx], DO_PAD);
+		idx++;
+
+		idx = cc_fin_hmac(desc, req, idx);
+	}
+
+	idx = cc_fin_result(desc, req, idx);
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_hash_request(dev, state, src, true);
+		cc_unmap_result(dev, state, digestsize, result);
+		cc_unmap_req(dev, state, ctx);
+	}
+	return rc;
+}
+
+static int cc_restore_hash(struct cc_hw_desc *desc, struct cc_hash_ctx *ctx,
+			   struct ahash_req_ctx *state, unsigned int idx)
+{
+	/* Restore hash digest */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr,
+		     ctx->inter_digestsize, NS_BIT);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	idx++;
+
+	/* Restore hash current length */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_cipher_config1(&desc[idx], HASH_PADDING_DISABLED);
+	set_din_type(&desc[idx], DMA_DLLI, state->digest_bytes_len_dma_addr,
+		     ctx->drvdata->hash_len_sz, NS_BIT);
+	set_flow_mode(&desc[idx], S_DIN_to_HASH);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	idx++;
+
+	cc_set_desc(state, ctx, DIN_HASH, desc, false, &idx);
+
+	return idx;
+}
+
+static int cc_hash_update(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	unsigned int block_size = crypto_tfm_alg_blocksize(&tfm->base);
+	struct scatterlist *src = req->src;
+	unsigned int nbytes = req->nbytes;
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct cc_crypto_req cc_req = {};
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	u32 idx = 0;
+	int rc;
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	dev_dbg(dev, "===== %s-update (%d) ====\n", ctx->is_hmac ?
+		"hmac" : "hash", nbytes);
+
+	if (nbytes == 0) {
+		/* no real updates required */
+		return 0;
+	}
+
+	rc = cc_map_hash_request_update(ctx->drvdata, state, src, nbytes,
+					block_size, flags);
+	if (rc) {
+		if (rc == 1) {
+			dev_dbg(dev, " data size not require HW update %x\n",
+				nbytes);
+			/* No hardware updates are required */
+			return 0;
+		}
+		dev_err(dev, "map_ahash_request_update() failed\n");
+		return -ENOMEM;
+	}
+
+	if (cc_map_req(dev, state, ctx)) {
+		dev_err(dev, "map_ahash_source() failed\n");
+		cc_unmap_hash_request(dev, state, src, true);
+		return -EINVAL;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = cc_update_complete;
+	cc_req.user_arg = req;
+
+	idx = cc_restore_hash(desc, ctx, state, idx);
+
+	/* store the hash digest result in context */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_dout_dlli(&desc[idx], state->digest_buff_dma_addr,
+		      ctx->inter_digestsize, NS_BIT, 0);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	idx++;
+
+	/* store current hash length in context */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_dout_dlli(&desc[idx], state->digest_bytes_len_dma_addr,
+		      ctx->drvdata->hash_len_sz, NS_BIT, 1);
+	set_queue_last_ind(ctx->drvdata, &desc[idx]);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE1);
+	idx++;
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_hash_request(dev, state, src, true);
+		cc_unmap_req(dev, state, ctx);
+	}
+	return rc;
+}
+
+static int cc_hash_finup(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+	struct scatterlist *src = req->src;
+	unsigned int nbytes = req->nbytes;
+	u8 *result = req->result;
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	bool is_hmac = ctx->is_hmac;
+	struct cc_crypto_req cc_req = {};
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	unsigned int idx = 0;
+	int rc;
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	dev_dbg(dev, "===== %s-finup (%d) ====\n", is_hmac ? "hmac" : "hash",
+		nbytes);
+
+	if (cc_map_req(dev, state, ctx)) {
+		dev_err(dev, "map_ahash_source() failed\n");
+		return -EINVAL;
+	}
+
+	if (cc_map_hash_request_final(ctx->drvdata, state, src, nbytes, 1,
+				      flags)) {
+		dev_err(dev, "map_ahash_request_final() failed\n");
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+	if (cc_map_result(dev, state, digestsize)) {
+		dev_err(dev, "map_ahash_digest() failed\n");
+		cc_unmap_hash_request(dev, state, src, true);
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = cc_hash_complete;
+	cc_req.user_arg = req;
+
+	idx = cc_restore_hash(desc, ctx, state, idx);
+
+	if (is_hmac)
+		idx = cc_fin_hmac(desc, req, idx);
+
+	idx = cc_fin_result(desc, req, idx);
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_hash_request(dev, state, src, true);
+		cc_unmap_result(dev, state, digestsize, result);
+		cc_unmap_req(dev, state, ctx);
+	}
+	return rc;
+}
+
+static int cc_hash_final(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+	struct scatterlist *src = req->src;
+	unsigned int nbytes = req->nbytes;
+	u8 *result = req->result;
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	bool is_hmac = ctx->is_hmac;
+	struct cc_crypto_req cc_req = {};
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	unsigned int idx = 0;
+	int rc;
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	dev_dbg(dev, "===== %s-final (%d) ====\n", is_hmac ? "hmac" : "hash",
+		nbytes);
+
+	if (cc_map_req(dev, state, ctx)) {
+		dev_err(dev, "map_ahash_source() failed\n");
+		return -EINVAL;
+	}
+
+	if (cc_map_hash_request_final(ctx->drvdata, state, src, nbytes, 0,
+				      flags)) {
+		dev_err(dev, "map_ahash_request_final() failed\n");
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	if (cc_map_result(dev, state, digestsize)) {
+		dev_err(dev, "map_ahash_digest() failed\n");
+		cc_unmap_hash_request(dev, state, src, true);
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = cc_hash_complete;
+	cc_req.user_arg = req;
+
+	idx = cc_restore_hash(desc, ctx, state, idx);
+
+	/* "DO-PAD" must be enabled only when writing current length to HW */
+	hw_desc_init(&desc[idx]);
+	set_cipher_do(&desc[idx], DO_PAD);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_dout_dlli(&desc[idx], state->digest_bytes_len_dma_addr,
+		      ctx->drvdata->hash_len_sz, NS_BIT, 0);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE1);
+	set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+	idx++;
+
+	if (is_hmac)
+		idx = cc_fin_hmac(desc, req, idx);
+
+	idx = cc_fin_result(desc, req, idx);
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_hash_request(dev, state, src, true);
+		cc_unmap_result(dev, state, digestsize, result);
+		cc_unmap_req(dev, state, ctx);
+	}
+	return rc;
+}
+
+static int cc_hash_init(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "===== init (%d) ====\n", req->nbytes);
+
+	cc_init_req(dev, state, ctx);
+
+	return 0;
+}
+
+static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key,
+			  unsigned int keylen)
+{
+	unsigned int hmac_pad_const[2] = { HMAC_IPAD_CONST, HMAC_OPAD_CONST };
+	struct cc_crypto_req cc_req = {};
+	struct cc_hash_ctx *ctx = NULL;
+	int blocksize = 0;
+	int digestsize = 0;
+	int i, idx = 0, rc = 0;
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	cc_sram_addr_t larval_addr;
+	struct device *dev;
+
+	ctx = crypto_ahash_ctx(ahash);
+	dev = drvdata_to_dev(ctx->drvdata);
+	dev_dbg(dev, "start keylen: %d", keylen);
+
+	blocksize = crypto_tfm_alg_blocksize(&ahash->base);
+	digestsize = crypto_ahash_digestsize(ahash);
+
+	larval_addr = cc_larval_digest_addr(ctx->drvdata, ctx->hash_mode);
+
+	/* The keylen value distinguishes HASH in case keylen is ZERO bytes,
+	 * any NON-ZERO value utilizes HMAC flow
+	 */
+	ctx->key_params.keylen = keylen;
+	ctx->key_params.key_dma_addr = 0;
+	ctx->is_hmac = true;
+
+	if (keylen) {
+		ctx->key_params.key_dma_addr =
+			dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) {
+			dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n",
+				key, keylen);
+			return -ENOMEM;
+		}
+		dev_dbg(dev, "mapping key-buffer: key_dma_addr=%pad keylen=%u\n",
+			&ctx->key_params.key_dma_addr, ctx->key_params.keylen);
+
+		if (keylen > blocksize) {
+			/* Load hash initial state */
+			hw_desc_init(&desc[idx]);
+			set_cipher_mode(&desc[idx], ctx->hw_mode);
+			set_din_sram(&desc[idx], larval_addr,
+				     ctx->inter_digestsize);
+			set_flow_mode(&desc[idx], S_DIN_to_HASH);
+			set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+			idx++;
+
+			/* Load the hash current length*/
+			hw_desc_init(&desc[idx]);
+			set_cipher_mode(&desc[idx], ctx->hw_mode);
+			set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz);
+			set_cipher_config1(&desc[idx], HASH_PADDING_ENABLED);
+			set_flow_mode(&desc[idx], S_DIN_to_HASH);
+			set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+			idx++;
+
+			hw_desc_init(&desc[idx]);
+			set_din_type(&desc[idx], DMA_DLLI,
+				     ctx->key_params.key_dma_addr, keylen,
+				     NS_BIT);
+			set_flow_mode(&desc[idx], DIN_HASH);
+			idx++;
+
+			/* Get hashed key */
+			hw_desc_init(&desc[idx]);
+			set_cipher_mode(&desc[idx], ctx->hw_mode);
+			set_dout_dlli(&desc[idx], ctx->opad_tmp_keys_dma_addr,
+				      digestsize, NS_BIT, 0);
+			set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+			set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+			set_cipher_config1(&desc[idx], HASH_PADDING_DISABLED);
+			cc_set_endianity(ctx->hash_mode, &desc[idx]);
+			idx++;
+
+			hw_desc_init(&desc[idx]);
+			set_din_const(&desc[idx], 0, (blocksize - digestsize));
+			set_flow_mode(&desc[idx], BYPASS);
+			set_dout_dlli(&desc[idx],
+				      (ctx->opad_tmp_keys_dma_addr +
+				       digestsize),
+				      (blocksize - digestsize), NS_BIT, 0);
+			idx++;
+		} else {
+			hw_desc_init(&desc[idx]);
+			set_din_type(&desc[idx], DMA_DLLI,
+				     ctx->key_params.key_dma_addr, keylen,
+				     NS_BIT);
+			set_flow_mode(&desc[idx], BYPASS);
+			set_dout_dlli(&desc[idx], ctx->opad_tmp_keys_dma_addr,
+				      keylen, NS_BIT, 0);
+			idx++;
+
+			if ((blocksize - keylen)) {
+				hw_desc_init(&desc[idx]);
+				set_din_const(&desc[idx], 0,
+					      (blocksize - keylen));
+				set_flow_mode(&desc[idx], BYPASS);
+				set_dout_dlli(&desc[idx],
+					      (ctx->opad_tmp_keys_dma_addr +
+					       keylen), (blocksize - keylen),
+					      NS_BIT, 0);
+				idx++;
+			}
+		}
+	} else {
+		hw_desc_init(&desc[idx]);
+		set_din_const(&desc[idx], 0, blocksize);
+		set_flow_mode(&desc[idx], BYPASS);
+		set_dout_dlli(&desc[idx], (ctx->opad_tmp_keys_dma_addr),
+			      blocksize, NS_BIT, 0);
+		idx++;
+	}
+
+	rc = cc_send_sync_request(ctx->drvdata, &cc_req, desc, idx);
+	if (rc) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		goto out;
+	}
+
+	/* calc derived HMAC key */
+	for (idx = 0, i = 0; i < 2; i++) {
+		/* Load hash initial state */
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		set_din_sram(&desc[idx], larval_addr, ctx->inter_digestsize);
+		set_flow_mode(&desc[idx], S_DIN_to_HASH);
+		set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+		idx++;
+
+		/* Load the hash current length*/
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		set_din_const(&desc[idx], 0, ctx->drvdata->hash_len_sz);
+		set_flow_mode(&desc[idx], S_DIN_to_HASH);
+		set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+		idx++;
+
+		/* Prepare ipad key */
+		hw_desc_init(&desc[idx]);
+		set_xor_val(&desc[idx], hmac_pad_const[i]);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		set_flow_mode(&desc[idx], S_DIN_to_HASH);
+		set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+		idx++;
+
+		/* Perform HASH update */
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI, ctx->opad_tmp_keys_dma_addr,
+			     blocksize, NS_BIT);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		set_xor_active(&desc[idx]);
+		set_flow_mode(&desc[idx], DIN_HASH);
+		idx++;
+
+		/* Get the IPAD/OPAD xor key (Note, IPAD is the initial digest
+		 * of the first HASH "update" state)
+		 */
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		if (i > 0) /* Not first iteration */
+			set_dout_dlli(&desc[idx], ctx->opad_tmp_keys_dma_addr,
+				      ctx->inter_digestsize, NS_BIT, 0);
+		else /* First iteration */
+			set_dout_dlli(&desc[idx], ctx->digest_buff_dma_addr,
+				      ctx->inter_digestsize, NS_BIT, 0);
+		set_flow_mode(&desc[idx], S_HASH_to_DOUT);
+		set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+		idx++;
+	}
+
+	rc = cc_send_sync_request(ctx->drvdata, &cc_req, desc, idx);
+
+out:
+	if (rc)
+		crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+	if (ctx->key_params.key_dma_addr) {
+		dma_unmap_single(dev, ctx->key_params.key_dma_addr,
+				 ctx->key_params.keylen, DMA_TO_DEVICE);
+		dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n",
+			&ctx->key_params.key_dma_addr, ctx->key_params.keylen);
+	}
+	return rc;
+}
+
+static int cc_xcbc_setkey(struct crypto_ahash *ahash,
+			  const u8 *key, unsigned int keylen)
+{
+	struct cc_crypto_req cc_req = {};
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	int rc = 0;
+	unsigned int idx = 0;
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+
+	dev_dbg(dev, "===== setkey (%d) ====\n", keylen);
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+	case AES_KEYSIZE_192:
+	case AES_KEYSIZE_256:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ctx->key_params.keylen = keylen;
+
+	ctx->key_params.key_dma_addr =
+		dma_map_single(dev, (void *)key, keylen, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, ctx->key_params.key_dma_addr)) {
+		dev_err(dev, "Mapping key va=0x%p len=%u for DMA failed\n",
+			key, keylen);
+		return -ENOMEM;
+	}
+	dev_dbg(dev, "mapping key-buffer: key_dma_addr=%pad keylen=%u\n",
+		&ctx->key_params.key_dma_addr, ctx->key_params.keylen);
+
+	ctx->is_hmac = true;
+	/* 1. Load the AES key */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, ctx->key_params.key_dma_addr,
+		     keylen, NS_BIT);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_ECB);
+	set_cipher_config0(&desc[idx], DRV_CRYPTO_DIRECTION_ENCRYPT);
+	set_key_size_aes(&desc[idx], keylen);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	idx++;
+
+	hw_desc_init(&desc[idx]);
+	set_din_const(&desc[idx], 0x01010101, CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], DIN_AES_DOUT);
+	set_dout_dlli(&desc[idx],
+		      (ctx->opad_tmp_keys_dma_addr + XCBC_MAC_K1_OFFSET),
+		      CC_AES_128_BIT_KEY_SIZE, NS_BIT, 0);
+	idx++;
+
+	hw_desc_init(&desc[idx]);
+	set_din_const(&desc[idx], 0x02020202, CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], DIN_AES_DOUT);
+	set_dout_dlli(&desc[idx],
+		      (ctx->opad_tmp_keys_dma_addr + XCBC_MAC_K2_OFFSET),
+		      CC_AES_128_BIT_KEY_SIZE, NS_BIT, 0);
+	idx++;
+
+	hw_desc_init(&desc[idx]);
+	set_din_const(&desc[idx], 0x03030303, CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], DIN_AES_DOUT);
+	set_dout_dlli(&desc[idx],
+		      (ctx->opad_tmp_keys_dma_addr + XCBC_MAC_K3_OFFSET),
+		      CC_AES_128_BIT_KEY_SIZE, NS_BIT, 0);
+	idx++;
+
+	rc = cc_send_sync_request(ctx->drvdata, &cc_req, desc, idx);
+
+	if (rc)
+		crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+	dma_unmap_single(dev, ctx->key_params.key_dma_addr,
+			 ctx->key_params.keylen, DMA_TO_DEVICE);
+	dev_dbg(dev, "Unmapped key-buffer: key_dma_addr=%pad keylen=%u\n",
+		&ctx->key_params.key_dma_addr, ctx->key_params.keylen);
+
+	return rc;
+}
+
+static int cc_cmac_setkey(struct crypto_ahash *ahash,
+			  const u8 *key, unsigned int keylen)
+{
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "===== setkey (%d) ====\n", keylen);
+
+	ctx->is_hmac = true;
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+	case AES_KEYSIZE_192:
+	case AES_KEYSIZE_256:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ctx->key_params.keylen = keylen;
+
+	/* STAT_PHASE_1: Copy key to ctx */
+
+	dma_sync_single_for_cpu(dev, ctx->opad_tmp_keys_dma_addr,
+				keylen, DMA_TO_DEVICE);
+
+	memcpy(ctx->opad_tmp_keys_buff, key, keylen);
+	if (keylen == 24) {
+		memset(ctx->opad_tmp_keys_buff + 24, 0,
+		       CC_AES_KEY_SIZE_MAX - 24);
+	}
+
+	dma_sync_single_for_device(dev, ctx->opad_tmp_keys_dma_addr,
+				   keylen, DMA_TO_DEVICE);
+
+	ctx->key_params.keylen = keylen;
+
+	return 0;
+}
+
+static void cc_free_ctx(struct cc_hash_ctx *ctx)
+{
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	if (ctx->digest_buff_dma_addr) {
+		dma_unmap_single(dev, ctx->digest_buff_dma_addr,
+				 sizeof(ctx->digest_buff), DMA_BIDIRECTIONAL);
+		dev_dbg(dev, "Unmapped digest-buffer: digest_buff_dma_addr=%pad\n",
+			&ctx->digest_buff_dma_addr);
+		ctx->digest_buff_dma_addr = 0;
+	}
+	if (ctx->opad_tmp_keys_dma_addr) {
+		dma_unmap_single(dev, ctx->opad_tmp_keys_dma_addr,
+				 sizeof(ctx->opad_tmp_keys_buff),
+				 DMA_BIDIRECTIONAL);
+		dev_dbg(dev, "Unmapped opad-digest: opad_tmp_keys_dma_addr=%pad\n",
+			&ctx->opad_tmp_keys_dma_addr);
+		ctx->opad_tmp_keys_dma_addr = 0;
+	}
+
+	ctx->key_params.keylen = 0;
+}
+
+static int cc_alloc_ctx(struct cc_hash_ctx *ctx)
+{
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	ctx->key_params.keylen = 0;
+
+	ctx->digest_buff_dma_addr =
+		dma_map_single(dev, (void *)ctx->digest_buff,
+			       sizeof(ctx->digest_buff), DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, ctx->digest_buff_dma_addr)) {
+		dev_err(dev, "Mapping digest len %zu B at va=%pK for DMA failed\n",
+			sizeof(ctx->digest_buff), ctx->digest_buff);
+		goto fail;
+	}
+	dev_dbg(dev, "Mapped digest %zu B at va=%pK to dma=%pad\n",
+		sizeof(ctx->digest_buff), ctx->digest_buff,
+		&ctx->digest_buff_dma_addr);
+
+	ctx->opad_tmp_keys_dma_addr =
+		dma_map_single(dev, (void *)ctx->opad_tmp_keys_buff,
+			       sizeof(ctx->opad_tmp_keys_buff),
+			       DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, ctx->opad_tmp_keys_dma_addr)) {
+		dev_err(dev, "Mapping opad digest %zu B at va=%pK for DMA failed\n",
+			sizeof(ctx->opad_tmp_keys_buff),
+			ctx->opad_tmp_keys_buff);
+		goto fail;
+	}
+	dev_dbg(dev, "Mapped opad_tmp_keys %zu B at va=%pK to dma=%pad\n",
+		sizeof(ctx->opad_tmp_keys_buff), ctx->opad_tmp_keys_buff,
+		&ctx->opad_tmp_keys_dma_addr);
+
+	ctx->is_hmac = false;
+	return 0;
+
+fail:
+	cc_free_ctx(ctx);
+	return -ENOMEM;
+}
+
+static int cc_cra_init(struct crypto_tfm *tfm)
+{
+	struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct hash_alg_common *hash_alg_common =
+		container_of(tfm->__crt_alg, struct hash_alg_common, base);
+	struct ahash_alg *ahash_alg =
+		container_of(hash_alg_common, struct ahash_alg, halg);
+	struct cc_hash_alg *cc_alg =
+			container_of(ahash_alg, struct cc_hash_alg, ahash_alg);
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct ahash_req_ctx));
+
+	ctx->hash_mode = cc_alg->hash_mode;
+	ctx->hw_mode = cc_alg->hw_mode;
+	ctx->inter_digestsize = cc_alg->inter_digestsize;
+	ctx->drvdata = cc_alg->drvdata;
+
+	return cc_alloc_ctx(ctx);
+}
+
+static void cc_cra_exit(struct crypto_tfm *tfm)
+{
+	struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	dev_dbg(dev, "cc_cra_exit");
+	cc_free_ctx(ctx);
+}
+
+static int cc_mac_update(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	unsigned int block_size = crypto_tfm_alg_blocksize(&tfm->base);
+	struct cc_crypto_req cc_req = {};
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	int rc;
+	u32 idx = 0;
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	if (req->nbytes == 0) {
+		/* no real updates required */
+		return 0;
+	}
+
+	state->xcbc_count++;
+
+	rc = cc_map_hash_request_update(ctx->drvdata, state, req->src,
+					req->nbytes, block_size, flags);
+	if (rc) {
+		if (rc == 1) {
+			dev_dbg(dev, " data size not require HW update %x\n",
+				req->nbytes);
+			/* No hardware updates are required */
+			return 0;
+		}
+		dev_err(dev, "map_ahash_request_update() failed\n");
+		return -ENOMEM;
+	}
+
+	if (cc_map_req(dev, state, ctx)) {
+		dev_err(dev, "map_ahash_source() failed\n");
+		return -EINVAL;
+	}
+
+	if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC)
+		cc_setup_xcbc(req, desc, &idx);
+	else
+		cc_setup_cmac(req, desc, &idx);
+
+	cc_set_desc(state, ctx, DIN_AES_DOUT, desc, true, &idx);
+
+	/* store the hash digest result in context */
+	hw_desc_init(&desc[idx]);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	set_dout_dlli(&desc[idx], state->digest_buff_dma_addr,
+		      ctx->inter_digestsize, NS_BIT, 1);
+	set_queue_last_ind(ctx->drvdata, &desc[idx]);
+	set_flow_mode(&desc[idx], S_AES_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	idx++;
+
+	/* Setup request structure */
+	cc_req.user_cb = (void *)cc_update_complete;
+	cc_req.user_arg = (void *)req;
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_hash_request(dev, state, req->src, true);
+		cc_unmap_req(dev, state, ctx);
+	}
+	return rc;
+}
+
+static int cc_mac_final(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct cc_crypto_req cc_req = {};
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	int idx = 0;
+	int rc = 0;
+	u32 key_size, key_len;
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+	gfp_t flags = cc_gfp_flags(&req->base);
+	u32 rem_cnt = *cc_hash_buf_cnt(state);
+
+	if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) {
+		key_size = CC_AES_128_BIT_KEY_SIZE;
+		key_len  = CC_AES_128_BIT_KEY_SIZE;
+	} else {
+		key_size = (ctx->key_params.keylen == 24) ? AES_MAX_KEY_SIZE :
+			ctx->key_params.keylen;
+		key_len =  ctx->key_params.keylen;
+	}
+
+	dev_dbg(dev, "===== final  xcbc reminder (%d) ====\n", rem_cnt);
+
+	if (cc_map_req(dev, state, ctx)) {
+		dev_err(dev, "map_ahash_source() failed\n");
+		return -EINVAL;
+	}
+
+	if (cc_map_hash_request_final(ctx->drvdata, state, req->src,
+				      req->nbytes, 0, flags)) {
+		dev_err(dev, "map_ahash_request_final() failed\n");
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	if (cc_map_result(dev, state, digestsize)) {
+		dev_err(dev, "map_ahash_digest() failed\n");
+		cc_unmap_hash_request(dev, state, req->src, true);
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = (void *)cc_hash_complete;
+	cc_req.user_arg = (void *)req;
+
+	if (state->xcbc_count && rem_cnt == 0) {
+		/* Load key for ECB decryption */
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], DRV_CIPHER_ECB);
+		set_cipher_config0(&desc[idx], DRV_CRYPTO_DIRECTION_DECRYPT);
+		set_din_type(&desc[idx], DMA_DLLI,
+			     (ctx->opad_tmp_keys_dma_addr + XCBC_MAC_K1_OFFSET),
+			     key_size, NS_BIT);
+		set_key_size_aes(&desc[idx], key_len);
+		set_flow_mode(&desc[idx], S_DIN_to_AES);
+		set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+		idx++;
+
+		/* Initiate decryption of block state to previous
+		 * block_state-XOR-M[n]
+		 */
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr,
+			     CC_AES_BLOCK_SIZE, NS_BIT);
+		set_dout_dlli(&desc[idx], state->digest_buff_dma_addr,
+			      CC_AES_BLOCK_SIZE, NS_BIT, 0);
+		set_flow_mode(&desc[idx], DIN_AES_DOUT);
+		idx++;
+
+		/* Memory Barrier: wait for axi write to complete */
+		hw_desc_init(&desc[idx]);
+		set_din_no_dma(&desc[idx], 0, 0xfffff0);
+		set_dout_no_dma(&desc[idx], 0, 0, 1);
+		idx++;
+	}
+
+	if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC)
+		cc_setup_xcbc(req, desc, &idx);
+	else
+		cc_setup_cmac(req, desc, &idx);
+
+	if (state->xcbc_count == 0) {
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		set_key_size_aes(&desc[idx], key_len);
+		set_cmac_size0_mode(&desc[idx]);
+		set_flow_mode(&desc[idx], S_DIN_to_AES);
+		idx++;
+	} else if (rem_cnt > 0) {
+		cc_set_desc(state, ctx, DIN_AES_DOUT, desc, false, &idx);
+	} else {
+		hw_desc_init(&desc[idx]);
+		set_din_const(&desc[idx], 0x00, CC_AES_BLOCK_SIZE);
+		set_flow_mode(&desc[idx], DIN_AES_DOUT);
+		idx++;
+	}
+
+	/* Get final MAC result */
+	hw_desc_init(&desc[idx]);
+	/* TODO */
+	set_dout_dlli(&desc[idx], state->digest_result_dma_addr,
+		      digestsize, NS_BIT, 1);
+	set_queue_last_ind(ctx->drvdata, &desc[idx]);
+	set_flow_mode(&desc[idx], S_AES_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	idx++;
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_hash_request(dev, state, req->src, true);
+		cc_unmap_result(dev, state, digestsize, req->result);
+		cc_unmap_req(dev, state, ctx);
+	}
+	return rc;
+}
+
+static int cc_mac_finup(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct cc_crypto_req cc_req = {};
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	int idx = 0;
+	int rc = 0;
+	u32 key_len = 0;
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	dev_dbg(dev, "===== finup xcbc(%d) ====\n", req->nbytes);
+	if (state->xcbc_count > 0 && req->nbytes == 0) {
+		dev_dbg(dev, "No data to update. Call to fdx_mac_final\n");
+		return cc_mac_final(req);
+	}
+
+	if (cc_map_req(dev, state, ctx)) {
+		dev_err(dev, "map_ahash_source() failed\n");
+		return -EINVAL;
+	}
+
+	if (cc_map_hash_request_final(ctx->drvdata, state, req->src,
+				      req->nbytes, 1, flags)) {
+		dev_err(dev, "map_ahash_request_final() failed\n");
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+	if (cc_map_result(dev, state, digestsize)) {
+		dev_err(dev, "map_ahash_digest() failed\n");
+		cc_unmap_hash_request(dev, state, req->src, true);
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = (void *)cc_hash_complete;
+	cc_req.user_arg = (void *)req;
+
+	if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) {
+		key_len = CC_AES_128_BIT_KEY_SIZE;
+		cc_setup_xcbc(req, desc, &idx);
+	} else {
+		key_len = ctx->key_params.keylen;
+		cc_setup_cmac(req, desc, &idx);
+	}
+
+	if (req->nbytes == 0) {
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		set_key_size_aes(&desc[idx], key_len);
+		set_cmac_size0_mode(&desc[idx]);
+		set_flow_mode(&desc[idx], S_DIN_to_AES);
+		idx++;
+	} else {
+		cc_set_desc(state, ctx, DIN_AES_DOUT, desc, false, &idx);
+	}
+
+	/* Get final MAC result */
+	hw_desc_init(&desc[idx]);
+	/* TODO */
+	set_dout_dlli(&desc[idx], state->digest_result_dma_addr,
+		      digestsize, NS_BIT, 1);
+	set_queue_last_ind(ctx->drvdata, &desc[idx]);
+	set_flow_mode(&desc[idx], S_AES_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	idx++;
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_hash_request(dev, state, req->src, true);
+		cc_unmap_result(dev, state, digestsize, req->result);
+		cc_unmap_req(dev, state, ctx);
+	}
+	return rc;
+}
+
+static int cc_mac_digest(struct ahash_request *req)
+{
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	u32 digestsize = crypto_ahash_digestsize(tfm);
+	struct cc_crypto_req cc_req = {};
+	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
+	u32 key_len;
+	unsigned int idx = 0;
+	int rc;
+	gfp_t flags = cc_gfp_flags(&req->base);
+
+	dev_dbg(dev, "===== -digest mac (%d) ====\n",  req->nbytes);
+
+	cc_init_req(dev, state, ctx);
+
+	if (cc_map_req(dev, state, ctx)) {
+		dev_err(dev, "map_ahash_source() failed\n");
+		return -ENOMEM;
+	}
+	if (cc_map_result(dev, state, digestsize)) {
+		dev_err(dev, "map_ahash_digest() failed\n");
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	if (cc_map_hash_request_final(ctx->drvdata, state, req->src,
+				      req->nbytes, 1, flags)) {
+		dev_err(dev, "map_ahash_request_final() failed\n");
+		cc_unmap_req(dev, state, ctx);
+		return -ENOMEM;
+	}
+
+	/* Setup request structure */
+	cc_req.user_cb = (void *)cc_digest_complete;
+	cc_req.user_arg = (void *)req;
+
+	if (ctx->hw_mode == DRV_CIPHER_XCBC_MAC) {
+		key_len = CC_AES_128_BIT_KEY_SIZE;
+		cc_setup_xcbc(req, desc, &idx);
+	} else {
+		key_len = ctx->key_params.keylen;
+		cc_setup_cmac(req, desc, &idx);
+	}
+
+	if (req->nbytes == 0) {
+		hw_desc_init(&desc[idx]);
+		set_cipher_mode(&desc[idx], ctx->hw_mode);
+		set_key_size_aes(&desc[idx], key_len);
+		set_cmac_size0_mode(&desc[idx]);
+		set_flow_mode(&desc[idx], S_DIN_to_AES);
+		idx++;
+	} else {
+		cc_set_desc(state, ctx, DIN_AES_DOUT, desc, false, &idx);
+	}
+
+	/* Get final MAC result */
+	hw_desc_init(&desc[idx]);
+	set_dout_dlli(&desc[idx], state->digest_result_dma_addr,
+		      CC_AES_BLOCK_SIZE, NS_BIT, 1);
+	set_queue_last_ind(ctx->drvdata, &desc[idx]);
+	set_flow_mode(&desc[idx], S_AES_to_DOUT);
+	set_setup_mode(&desc[idx], SETUP_WRITE_STATE0);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_cipher_mode(&desc[idx], ctx->hw_mode);
+	idx++;
+
+	rc = cc_send_request(ctx->drvdata, &cc_req, desc, idx, &req->base);
+	if (rc != -EINPROGRESS && rc != -EBUSY) {
+		dev_err(dev, "send_request() failed (rc=%d)\n", rc);
+		cc_unmap_hash_request(dev, state, req->src, true);
+		cc_unmap_result(dev, state, digestsize, req->result);
+		cc_unmap_req(dev, state, ctx);
+	}
+	return rc;
+}
+
+static int cc_hash_export(struct ahash_request *req, void *out)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	u8 *curr_buff = cc_hash_buf(state);
+	u32 curr_buff_cnt = *cc_hash_buf_cnt(state);
+	const u32 tmp = CC_EXPORT_MAGIC;
+
+	memcpy(out, &tmp, sizeof(u32));
+	out += sizeof(u32);
+
+	memcpy(out, state->digest_buff, ctx->inter_digestsize);
+	out += ctx->inter_digestsize;
+
+	memcpy(out, state->digest_bytes_len, ctx->drvdata->hash_len_sz);
+	out += ctx->drvdata->hash_len_sz;
+
+	memcpy(out, &curr_buff_cnt, sizeof(u32));
+	out += sizeof(u32);
+
+	memcpy(out, curr_buff, curr_buff_cnt);
+
+	return 0;
+}
+
+static int cc_hash_import(struct ahash_request *req, const void *in)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	u32 tmp;
+
+	memcpy(&tmp, in, sizeof(u32));
+	if (tmp != CC_EXPORT_MAGIC)
+		return -EINVAL;
+	in += sizeof(u32);
+
+	cc_init_req(dev, state, ctx);
+
+	memcpy(state->digest_buff, in, ctx->inter_digestsize);
+	in += ctx->inter_digestsize;
+
+	memcpy(state->digest_bytes_len, in, ctx->drvdata->hash_len_sz);
+	in += ctx->drvdata->hash_len_sz;
+
+	/* Sanity check the data as much as possible */
+	memcpy(&tmp, in, sizeof(u32));
+	if (tmp > CC_MAX_HASH_BLCK_SIZE)
+		return -EINVAL;
+	in += sizeof(u32);
+
+	state->buf_cnt[0] = tmp;
+	memcpy(state->buffers[0], in, tmp);
+
+	return 0;
+}
+
+struct cc_hash_template {
+	char name[CRYPTO_MAX_ALG_NAME];
+	char driver_name[CRYPTO_MAX_ALG_NAME];
+	char mac_name[CRYPTO_MAX_ALG_NAME];
+	char mac_driver_name[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	bool synchronize;
+	struct ahash_alg template_ahash;
+	int hash_mode;
+	int hw_mode;
+	int inter_digestsize;
+	struct cc_drvdata *drvdata;
+	u32 min_hw_rev;
+};
+
+#define CC_STATE_SIZE(_x) \
+	((_x) + HASH_MAX_LEN_SIZE + CC_MAX_HASH_BLCK_SIZE + (2 * sizeof(u32)))
+
+/* hash descriptors */
+static struct cc_hash_template driver_hash[] = {
+	//Asynchronize hash template
+	{
+		.name = "sha1",
+		.driver_name = "sha1-ccree",
+		.mac_name = "hmac(sha1)",
+		.mac_driver_name = "hmac-sha1-ccree",
+		.blocksize = SHA1_BLOCK_SIZE,
+		.synchronize = false,
+		.template_ahash = {
+			.init = cc_hash_init,
+			.update = cc_hash_update,
+			.final = cc_hash_final,
+			.finup = cc_hash_finup,
+			.digest = cc_hash_digest,
+			.export = cc_hash_export,
+			.import = cc_hash_import,
+			.setkey = cc_hash_setkey,
+			.halg = {
+				.digestsize = SHA1_DIGEST_SIZE,
+				.statesize = CC_STATE_SIZE(SHA1_DIGEST_SIZE),
+			},
+		},
+		.hash_mode = DRV_HASH_SHA1,
+		.hw_mode = DRV_HASH_HW_SHA1,
+		.inter_digestsize = SHA1_DIGEST_SIZE,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "sha256",
+		.driver_name = "sha256-ccree",
+		.mac_name = "hmac(sha256)",
+		.mac_driver_name = "hmac-sha256-ccree",
+		.blocksize = SHA256_BLOCK_SIZE,
+		.template_ahash = {
+			.init = cc_hash_init,
+			.update = cc_hash_update,
+			.final = cc_hash_final,
+			.finup = cc_hash_finup,
+			.digest = cc_hash_digest,
+			.export = cc_hash_export,
+			.import = cc_hash_import,
+			.setkey = cc_hash_setkey,
+			.halg = {
+				.digestsize = SHA256_DIGEST_SIZE,
+				.statesize = CC_STATE_SIZE(SHA256_DIGEST_SIZE)
+			},
+		},
+		.hash_mode = DRV_HASH_SHA256,
+		.hw_mode = DRV_HASH_HW_SHA256,
+		.inter_digestsize = SHA256_DIGEST_SIZE,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "sha224",
+		.driver_name = "sha224-ccree",
+		.mac_name = "hmac(sha224)",
+		.mac_driver_name = "hmac-sha224-ccree",
+		.blocksize = SHA224_BLOCK_SIZE,
+		.template_ahash = {
+			.init = cc_hash_init,
+			.update = cc_hash_update,
+			.final = cc_hash_final,
+			.finup = cc_hash_finup,
+			.digest = cc_hash_digest,
+			.export = cc_hash_export,
+			.import = cc_hash_import,
+			.setkey = cc_hash_setkey,
+			.halg = {
+				.digestsize = SHA224_DIGEST_SIZE,
+				.statesize = CC_STATE_SIZE(SHA224_DIGEST_SIZE),
+			},
+		},
+		.hash_mode = DRV_HASH_SHA224,
+		.hw_mode = DRV_HASH_HW_SHA256,
+		.inter_digestsize = SHA256_DIGEST_SIZE,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "sha384",
+		.driver_name = "sha384-ccree",
+		.mac_name = "hmac(sha384)",
+		.mac_driver_name = "hmac-sha384-ccree",
+		.blocksize = SHA384_BLOCK_SIZE,
+		.template_ahash = {
+			.init = cc_hash_init,
+			.update = cc_hash_update,
+			.final = cc_hash_final,
+			.finup = cc_hash_finup,
+			.digest = cc_hash_digest,
+			.export = cc_hash_export,
+			.import = cc_hash_import,
+			.setkey = cc_hash_setkey,
+			.halg = {
+				.digestsize = SHA384_DIGEST_SIZE,
+				.statesize = CC_STATE_SIZE(SHA384_DIGEST_SIZE),
+			},
+		},
+		.hash_mode = DRV_HASH_SHA384,
+		.hw_mode = DRV_HASH_HW_SHA512,
+		.inter_digestsize = SHA512_DIGEST_SIZE,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "sha512",
+		.driver_name = "sha512-ccree",
+		.mac_name = "hmac(sha512)",
+		.mac_driver_name = "hmac-sha512-ccree",
+		.blocksize = SHA512_BLOCK_SIZE,
+		.template_ahash = {
+			.init = cc_hash_init,
+			.update = cc_hash_update,
+			.final = cc_hash_final,
+			.finup = cc_hash_finup,
+			.digest = cc_hash_digest,
+			.export = cc_hash_export,
+			.import = cc_hash_import,
+			.setkey = cc_hash_setkey,
+			.halg = {
+				.digestsize = SHA512_DIGEST_SIZE,
+				.statesize = CC_STATE_SIZE(SHA512_DIGEST_SIZE),
+			},
+		},
+		.hash_mode = DRV_HASH_SHA512,
+		.hw_mode = DRV_HASH_HW_SHA512,
+		.inter_digestsize = SHA512_DIGEST_SIZE,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "md5",
+		.driver_name = "md5-ccree",
+		.mac_name = "hmac(md5)",
+		.mac_driver_name = "hmac-md5-ccree",
+		.blocksize = MD5_HMAC_BLOCK_SIZE,
+		.template_ahash = {
+			.init = cc_hash_init,
+			.update = cc_hash_update,
+			.final = cc_hash_final,
+			.finup = cc_hash_finup,
+			.digest = cc_hash_digest,
+			.export = cc_hash_export,
+			.import = cc_hash_import,
+			.setkey = cc_hash_setkey,
+			.halg = {
+				.digestsize = MD5_DIGEST_SIZE,
+				.statesize = CC_STATE_SIZE(MD5_DIGEST_SIZE),
+			},
+		},
+		.hash_mode = DRV_HASH_MD5,
+		.hw_mode = DRV_HASH_HW_MD5,
+		.inter_digestsize = MD5_DIGEST_SIZE,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.mac_name = "xcbc(aes)",
+		.mac_driver_name = "xcbc-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_ahash = {
+			.init = cc_hash_init,
+			.update = cc_mac_update,
+			.final = cc_mac_final,
+			.finup = cc_mac_finup,
+			.digest = cc_mac_digest,
+			.setkey = cc_xcbc_setkey,
+			.export = cc_hash_export,
+			.import = cc_hash_import,
+			.halg = {
+				.digestsize = AES_BLOCK_SIZE,
+				.statesize = CC_STATE_SIZE(AES_BLOCK_SIZE),
+			},
+		},
+		.hash_mode = DRV_HASH_NULL,
+		.hw_mode = DRV_CIPHER_XCBC_MAC,
+		.inter_digestsize = AES_BLOCK_SIZE,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.mac_name = "cmac(aes)",
+		.mac_driver_name = "cmac-aes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_ahash = {
+			.init = cc_hash_init,
+			.update = cc_mac_update,
+			.final = cc_mac_final,
+			.finup = cc_mac_finup,
+			.digest = cc_mac_digest,
+			.setkey = cc_cmac_setkey,
+			.export = cc_hash_export,
+			.import = cc_hash_import,
+			.halg = {
+				.digestsize = AES_BLOCK_SIZE,
+				.statesize = CC_STATE_SIZE(AES_BLOCK_SIZE),
+			},
+		},
+		.hash_mode = DRV_HASH_NULL,
+		.hw_mode = DRV_CIPHER_CMAC,
+		.inter_digestsize = AES_BLOCK_SIZE,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+};
+
+static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template,
+					     struct device *dev, bool keyed)
+{
+	struct cc_hash_alg *t_crypto_alg;
+	struct crypto_alg *alg;
+	struct ahash_alg *halg;
+
+	t_crypto_alg = kzalloc(sizeof(*t_crypto_alg), GFP_KERNEL);
+	if (!t_crypto_alg)
+		return ERR_PTR(-ENOMEM);
+
+	t_crypto_alg->ahash_alg = template->template_ahash;
+	halg = &t_crypto_alg->ahash_alg;
+	alg = &halg->halg.base;
+
+	if (keyed) {
+		snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->mac_name);
+		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->mac_driver_name);
+	} else {
+		halg->setkey = NULL;
+		snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->name);
+		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			 template->driver_name);
+	}
+	alg->cra_module = THIS_MODULE;
+	alg->cra_ctxsize = sizeof(struct cc_hash_ctx);
+	alg->cra_priority = CC_CRA_PRIO;
+	alg->cra_blocksize = template->blocksize;
+	alg->cra_alignmask = 0;
+	alg->cra_exit = cc_cra_exit;
+
+	alg->cra_init = cc_cra_init;
+	alg->cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_TYPE_AHASH |
+			CRYPTO_ALG_KERN_DRIVER_ONLY;
+	alg->cra_type = &crypto_ahash_type;
+
+	t_crypto_alg->hash_mode = template->hash_mode;
+	t_crypto_alg->hw_mode = template->hw_mode;
+	t_crypto_alg->inter_digestsize = template->inter_digestsize;
+
+	return t_crypto_alg;
+}
+
+int cc_init_hash_sram(struct cc_drvdata *drvdata)
+{
+	struct cc_hash_handle *hash_handle = drvdata->hash_handle;
+	cc_sram_addr_t sram_buff_ofs = hash_handle->digest_len_sram_addr;
+	unsigned int larval_seq_len = 0;
+	struct cc_hw_desc larval_seq[CC_DIGEST_SIZE_MAX / sizeof(u32)];
+	bool large_sha_supported = (drvdata->hw_rev >= CC_HW_REV_712);
+	int rc = 0;
+
+	/* Copy-to-sram digest-len */
+	cc_set_sram_desc(digest_len_init, sram_buff_ofs,
+			 ARRAY_SIZE(digest_len_init), larval_seq,
+			 &larval_seq_len);
+	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	if (rc)
+		goto init_digest_const_err;
+
+	sram_buff_ofs += sizeof(digest_len_init);
+	larval_seq_len = 0;
+
+	if (large_sha_supported) {
+		/* Copy-to-sram digest-len for sha384/512 */
+		cc_set_sram_desc(digest_len_sha512_init, sram_buff_ofs,
+				 ARRAY_SIZE(digest_len_sha512_init),
+				 larval_seq, &larval_seq_len);
+		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+		if (rc)
+			goto init_digest_const_err;
+
+		sram_buff_ofs += sizeof(digest_len_sha512_init);
+		larval_seq_len = 0;
+	}
+
+	/* The initial digests offset */
+	hash_handle->larval_digest_sram_addr = sram_buff_ofs;
+
+	/* Copy-to-sram initial SHA* digests */
+	cc_set_sram_desc(md5_init, sram_buff_ofs, ARRAY_SIZE(md5_init),
+			 larval_seq, &larval_seq_len);
+	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	if (rc)
+		goto init_digest_const_err;
+	sram_buff_ofs += sizeof(md5_init);
+	larval_seq_len = 0;
+
+	cc_set_sram_desc(sha1_init, sram_buff_ofs,
+			 ARRAY_SIZE(sha1_init), larval_seq,
+			 &larval_seq_len);
+	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	if (rc)
+		goto init_digest_const_err;
+	sram_buff_ofs += sizeof(sha1_init);
+	larval_seq_len = 0;
+
+	cc_set_sram_desc(sha224_init, sram_buff_ofs,
+			 ARRAY_SIZE(sha224_init), larval_seq,
+			 &larval_seq_len);
+	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	if (rc)
+		goto init_digest_const_err;
+	sram_buff_ofs += sizeof(sha224_init);
+	larval_seq_len = 0;
+
+	cc_set_sram_desc(sha256_init, sram_buff_ofs,
+			 ARRAY_SIZE(sha256_init), larval_seq,
+			 &larval_seq_len);
+	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+	if (rc)
+		goto init_digest_const_err;
+	sram_buff_ofs += sizeof(sha256_init);
+	larval_seq_len = 0;
+
+	if (large_sha_supported) {
+		cc_set_sram_desc((u32 *)sha384_init, sram_buff_ofs,
+				 (ARRAY_SIZE(sha384_init) * 2), larval_seq,
+				 &larval_seq_len);
+		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+		if (rc)
+			goto init_digest_const_err;
+		sram_buff_ofs += sizeof(sha384_init);
+		larval_seq_len = 0;
+
+		cc_set_sram_desc((u32 *)sha512_init, sram_buff_ofs,
+				 (ARRAY_SIZE(sha512_init) * 2), larval_seq,
+				 &larval_seq_len);
+		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
+		if (rc)
+			goto init_digest_const_err;
+	}
+
+init_digest_const_err:
+	return rc;
+}
+
+static void __init cc_swap_dwords(u32 *buf, unsigned long size)
+{
+	int i;
+	u32 tmp;
+
+	for (i = 0; i < size; i += 2) {
+		tmp = buf[i];
+		buf[i] = buf[i + 1];
+		buf[i + 1] = tmp;
+	}
+}
+
+/*
+ * Due to the way the HW works we need to swap every
+ * double word in the SHA384 and SHA512 larval hashes
+ */
+void __init cc_hash_global_init(void)
+{
+	cc_swap_dwords((u32 *)&sha384_init, (ARRAY_SIZE(sha384_init) * 2));
+	cc_swap_dwords((u32 *)&sha512_init, (ARRAY_SIZE(sha512_init) * 2));
+}
+
+int cc_hash_alloc(struct cc_drvdata *drvdata)
+{
+	struct cc_hash_handle *hash_handle;
+	cc_sram_addr_t sram_buff;
+	u32 sram_size_to_alloc;
+	struct device *dev = drvdata_to_dev(drvdata);
+	int rc = 0;
+	int alg;
+
+	hash_handle = kzalloc(sizeof(*hash_handle), GFP_KERNEL);
+	if (!hash_handle)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&hash_handle->hash_list);
+	drvdata->hash_handle = hash_handle;
+
+	sram_size_to_alloc = sizeof(digest_len_init) +
+			sizeof(md5_init) +
+			sizeof(sha1_init) +
+			sizeof(sha224_init) +
+			sizeof(sha256_init);
+
+	if (drvdata->hw_rev >= CC_HW_REV_712)
+		sram_size_to_alloc += sizeof(digest_len_sha512_init) +
+			sizeof(sha384_init) + sizeof(sha512_init);
+
+	sram_buff = cc_sram_alloc(drvdata, sram_size_to_alloc);
+	if (sram_buff == NULL_SRAM_ADDR) {
+		dev_err(dev, "SRAM pool exhausted\n");
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	/* The initial digest-len offset */
+	hash_handle->digest_len_sram_addr = sram_buff;
+
+	/*must be set before the alg registration as it is being used there*/
+	rc = cc_init_hash_sram(drvdata);
+	if (rc) {
+		dev_err(dev, "Init digest CONST failed (rc=%d)\n", rc);
+		goto fail;
+	}
+
+	/* ahash registration */
+	for (alg = 0; alg < ARRAY_SIZE(driver_hash); alg++) {
+		struct cc_hash_alg *t_alg;
+		int hw_mode = driver_hash[alg].hw_mode;
+
+		/* We either support both HASH and MAC or none */
+		if (driver_hash[alg].min_hw_rev > drvdata->hw_rev)
+			continue;
+
+		/* register hmac version */
+		t_alg = cc_alloc_hash_alg(&driver_hash[alg], dev, true);
+		if (IS_ERR(t_alg)) {
+			rc = PTR_ERR(t_alg);
+			dev_err(dev, "%s alg allocation failed\n",
+				driver_hash[alg].driver_name);
+			goto fail;
+		}
+		t_alg->drvdata = drvdata;
+
+		rc = crypto_register_ahash(&t_alg->ahash_alg);
+		if (rc) {
+			dev_err(dev, "%s alg registration failed\n",
+				driver_hash[alg].driver_name);
+			kfree(t_alg);
+			goto fail;
+		} else {
+			list_add_tail(&t_alg->entry, &hash_handle->hash_list);
+		}
+
+		if (hw_mode == DRV_CIPHER_XCBC_MAC ||
+		    hw_mode == DRV_CIPHER_CMAC)
+			continue;
+
+		/* register hash version */
+		t_alg = cc_alloc_hash_alg(&driver_hash[alg], dev, false);
+		if (IS_ERR(t_alg)) {
+			rc = PTR_ERR(t_alg);
+			dev_err(dev, "%s alg allocation failed\n",
+				driver_hash[alg].driver_name);
+			goto fail;
+		}
+		t_alg->drvdata = drvdata;
+
+		rc = crypto_register_ahash(&t_alg->ahash_alg);
+		if (rc) {
+			dev_err(dev, "%s alg registration failed\n",
+				driver_hash[alg].driver_name);
+			kfree(t_alg);
+			goto fail;
+		} else {
+			list_add_tail(&t_alg->entry, &hash_handle->hash_list);
+		}
+	}
+
+	return 0;
+
+fail:
+	kfree(drvdata->hash_handle);
+	drvdata->hash_handle = NULL;
+	return rc;
+}
+
+int cc_hash_free(struct cc_drvdata *drvdata)
+{
+	struct cc_hash_alg *t_hash_alg, *hash_n;
+	struct cc_hash_handle *hash_handle = drvdata->hash_handle;
+
+	if (hash_handle) {
+		list_for_each_entry_safe(t_hash_alg, hash_n,
+					 &hash_handle->hash_list, entry) {
+			crypto_unregister_ahash(&t_hash_alg->ahash_alg);
+			list_del(&t_hash_alg->entry);
+			kfree(t_hash_alg);
+		}
+
+		kfree(hash_handle);
+		drvdata->hash_handle = NULL;
+	}
+	return 0;
+}
+
+static void cc_setup_xcbc(struct ahash_request *areq, struct cc_hw_desc desc[],
+			  unsigned int *seq_size)
+{
+	unsigned int idx = *seq_size;
+	struct ahash_req_ctx *state = ahash_request_ctx(areq);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	/* Setup XCBC MAC K1 */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, (ctx->opad_tmp_keys_dma_addr +
+					    XCBC_MAC_K1_OFFSET),
+		     CC_AES_128_BIT_KEY_SIZE, NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	/* Setup XCBC MAC K2 */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI,
+		     (ctx->opad_tmp_keys_dma_addr + XCBC_MAC_K2_OFFSET),
+		     CC_AES_128_BIT_KEY_SIZE, NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE1);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	/* Setup XCBC MAC K3 */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI,
+		     (ctx->opad_tmp_keys_dma_addr + XCBC_MAC_K3_OFFSET),
+		     CC_AES_128_BIT_KEY_SIZE, NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE2);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	/* Loading MAC state */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr,
+		     CC_AES_BLOCK_SIZE, NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_XCBC_MAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+	*seq_size = idx;
+}
+
+static void cc_setup_cmac(struct ahash_request *areq, struct cc_hw_desc desc[],
+			  unsigned int *seq_size)
+{
+	unsigned int idx = *seq_size;
+	struct ahash_req_ctx *state = ahash_request_ctx(areq);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	/* Setup CMAC Key */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, ctx->opad_tmp_keys_dma_addr,
+		     ((ctx->key_params.keylen == 24) ? AES_MAX_KEY_SIZE :
+		      ctx->key_params.keylen), NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_KEY0);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_CMAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], ctx->key_params.keylen);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+
+	/* Load MAC state */
+	hw_desc_init(&desc[idx]);
+	set_din_type(&desc[idx], DMA_DLLI, state->digest_buff_dma_addr,
+		     CC_AES_BLOCK_SIZE, NS_BIT);
+	set_setup_mode(&desc[idx], SETUP_LOAD_STATE0);
+	set_cipher_mode(&desc[idx], DRV_CIPHER_CMAC);
+	set_cipher_config0(&desc[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_key_size_aes(&desc[idx], ctx->key_params.keylen);
+	set_flow_mode(&desc[idx], S_DIN_to_AES);
+	idx++;
+	*seq_size = idx;
+}
+
+static void cc_set_desc(struct ahash_req_ctx *areq_ctx,
+			struct cc_hash_ctx *ctx, unsigned int flow_mode,
+			struct cc_hw_desc desc[], bool is_not_last_data,
+			unsigned int *seq_size)
+{
+	unsigned int idx = *seq_size;
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
+
+	if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_DLLI) {
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI,
+			     sg_dma_address(areq_ctx->curr_sg),
+			     areq_ctx->curr_sg->length, NS_BIT);
+		set_flow_mode(&desc[idx], flow_mode);
+		idx++;
+	} else {
+		if (areq_ctx->data_dma_buf_type == CC_DMA_BUF_NULL) {
+			dev_dbg(dev, " NULL mode\n");
+			/* nothing to build */
+			return;
+		}
+		/* bypass */
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_DLLI,
+			     areq_ctx->mlli_params.mlli_dma_addr,
+			     areq_ctx->mlli_params.mlli_len, NS_BIT);
+		set_dout_sram(&desc[idx], ctx->drvdata->mlli_sram_addr,
+			      areq_ctx->mlli_params.mlli_len);
+		set_flow_mode(&desc[idx], BYPASS);
+		idx++;
+		/* process */
+		hw_desc_init(&desc[idx]);
+		set_din_type(&desc[idx], DMA_MLLI,
+			     ctx->drvdata->mlli_sram_addr,
+			     areq_ctx->mlli_nents, NS_BIT);
+		set_flow_mode(&desc[idx], flow_mode);
+		idx++;
+	}
+	if (is_not_last_data)
+		set_din_not_last_indication(&desc[(idx - 1)]);
+	/* return updated desc sequence size */
+	*seq_size = idx;
+}
+
+static const void *cc_larval_digest(struct device *dev, u32 mode)
+{
+	switch (mode) {
+	case DRV_HASH_MD5:
+		return md5_init;
+	case DRV_HASH_SHA1:
+		return sha1_init;
+	case DRV_HASH_SHA224:
+		return sha224_init;
+	case DRV_HASH_SHA256:
+		return sha256_init;
+	case DRV_HASH_SHA384:
+		return sha384_init;
+	case DRV_HASH_SHA512:
+		return sha512_init;
+	default:
+		dev_err(dev, "Invalid hash mode (%d)\n", mode);
+		return md5_init;
+	}
+}
+
+/*!
+ * Gets the address of the initial digest in SRAM
+ * according to the given hash mode
+ *
+ * \param drvdata
+ * \param mode The Hash mode. Supported modes: MD5/SHA1/SHA224/SHA256
+ *
+ * \return u32 The address of the initial digest in SRAM
+ */
+cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode)
+{
+	struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata;
+	struct cc_hash_handle *hash_handle = _drvdata->hash_handle;
+	struct device *dev = drvdata_to_dev(_drvdata);
+
+	switch (mode) {
+	case DRV_HASH_NULL:
+		break; /*Ignore*/
+	case DRV_HASH_MD5:
+		return (hash_handle->larval_digest_sram_addr);
+	case DRV_HASH_SHA1:
+		return (hash_handle->larval_digest_sram_addr +
+			sizeof(md5_init));
+	case DRV_HASH_SHA224:
+		return (hash_handle->larval_digest_sram_addr +
+			sizeof(md5_init) +
+			sizeof(sha1_init));
+	case DRV_HASH_SHA256:
+		return (hash_handle->larval_digest_sram_addr +
+			sizeof(md5_init) +
+			sizeof(sha1_init) +
+			sizeof(sha224_init));
+	case DRV_HASH_SHA384:
+		return (hash_handle->larval_digest_sram_addr +
+			sizeof(md5_init) +
+			sizeof(sha1_init) +
+			sizeof(sha224_init) +
+			sizeof(sha256_init));
+	case DRV_HASH_SHA512:
+		return (hash_handle->larval_digest_sram_addr +
+			sizeof(md5_init) +
+			sizeof(sha1_init) +
+			sizeof(sha224_init) +
+			sizeof(sha256_init) +
+			sizeof(sha384_init));
+	default:
+		dev_err(dev, "Invalid hash mode (%d)\n", mode);
+	}
+
+	/*This is valid wrong value to avoid kernel crash*/
+	return hash_handle->larval_digest_sram_addr;
+}
+
+cc_sram_addr_t
+cc_digest_len_addr(void *drvdata, u32 mode)
+{
+	struct cc_drvdata *_drvdata = (struct cc_drvdata *)drvdata;
+	struct cc_hash_handle *hash_handle = _drvdata->hash_handle;
+	cc_sram_addr_t digest_len_addr = hash_handle->digest_len_sram_addr;
+
+	switch (mode) {
+	case DRV_HASH_SHA1:
+	case DRV_HASH_SHA224:
+	case DRV_HASH_SHA256:
+	case DRV_HASH_MD5:
+		return digest_len_addr;
+#if (CC_DEV_SHA_MAX > 256)
+	case DRV_HASH_SHA384:
+	case DRV_HASH_SHA512:
+		return  digest_len_addr + sizeof(digest_len_init);
+#endif
+	default:
+		return digest_len_addr; /*to avoid kernel crash*/
+	}
+}
diff --git a/drivers/crypto/ccree/cc_hash.h b/drivers/crypto/ccree/cc_hash.h
new file mode 100644
index 000000000000..2e5bf8b0bbb6
--- /dev/null
+++ b/drivers/crypto/ccree/cc_hash.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+/* \file cc_hash.h
+ * ARM CryptoCell Hash Crypto API
+ */
+
+#ifndef __CC_HASH_H__
+#define __CC_HASH_H__
+
+#include "cc_buffer_mgr.h"
+
+#define HMAC_IPAD_CONST	0x36363636
+#define HMAC_OPAD_CONST	0x5C5C5C5C
+#define HASH_LEN_SIZE_712 16
+#define HASH_LEN_SIZE_630 8
+#define HASH_MAX_LEN_SIZE HASH_LEN_SIZE_712
+#define CC_MAX_HASH_DIGEST_SIZE	SHA512_DIGEST_SIZE
+#define CC_MAX_HASH_BLCK_SIZE SHA512_BLOCK_SIZE
+
+#define XCBC_MAC_K1_OFFSET 0
+#define XCBC_MAC_K2_OFFSET 16
+#define XCBC_MAC_K3_OFFSET 32
+
+#define CC_EXPORT_MAGIC 0xC2EE1070U
+
+/* this struct was taken from drivers/crypto/nx/nx-aes-xcbc.c and it is used
+ * for xcbc/cmac statesize
+ */
+struct aeshash_state {
+	u8 state[AES_BLOCK_SIZE];
+	unsigned int count;
+	u8 buffer[AES_BLOCK_SIZE];
+};
+
+/* ahash state */
+struct ahash_req_ctx {
+	u8 buffers[2][CC_MAX_HASH_BLCK_SIZE] ____cacheline_aligned;
+	u8 digest_result_buff[CC_MAX_HASH_DIGEST_SIZE] ____cacheline_aligned;
+	u8 digest_buff[CC_MAX_HASH_DIGEST_SIZE] ____cacheline_aligned;
+	u8 opad_digest_buff[CC_MAX_HASH_DIGEST_SIZE] ____cacheline_aligned;
+	u8 digest_bytes_len[HASH_MAX_LEN_SIZE] ____cacheline_aligned;
+	struct async_gen_req_ctx gen_ctx ____cacheline_aligned;
+	enum cc_req_dma_buf_type data_dma_buf_type;
+	dma_addr_t opad_digest_dma_addr;
+	dma_addr_t digest_buff_dma_addr;
+	dma_addr_t digest_bytes_len_dma_addr;
+	dma_addr_t digest_result_dma_addr;
+	u32 buf_cnt[2];
+	u32 buff_index;
+	u32 xcbc_count; /* count xcbc update operatations */
+	struct scatterlist buff_sg[2];
+	struct scatterlist *curr_sg;
+	u32 in_nents;
+	u32 mlli_nents;
+	struct mlli_params mlli_params;
+};
+
+static inline u32 *cc_hash_buf_cnt(struct ahash_req_ctx *state)
+{
+	return &state->buf_cnt[state->buff_index];
+}
+
+static inline u8 *cc_hash_buf(struct ahash_req_ctx *state)
+{
+	return state->buffers[state->buff_index];
+}
+
+static inline u32 *cc_next_buf_cnt(struct ahash_req_ctx *state)
+{
+	return &state->buf_cnt[state->buff_index ^ 1];
+}
+
+static inline u8 *cc_next_buf(struct ahash_req_ctx *state)
+{
+	return state->buffers[state->buff_index ^ 1];
+}
+
+int cc_hash_alloc(struct cc_drvdata *drvdata);
+int cc_init_hash_sram(struct cc_drvdata *drvdata);
+int cc_hash_free(struct cc_drvdata *drvdata);
+
+/*!
+ * Gets the initial digest length
+ *
+ * \param drvdata
+ * \param mode The Hash mode. Supported modes:
+ *             MD5/SHA1/SHA224/SHA256/SHA384/SHA512
+ *
+ * \return u32 returns the address of the initial digest length in SRAM
+ */
+cc_sram_addr_t
+cc_digest_len_addr(void *drvdata, u32 mode);
+
+/*!
+ * Gets the address of the initial digest in SRAM
+ * according to the given hash mode
+ *
+ * \param drvdata
+ * \param mode The Hash mode. Supported modes:
+ *             MD5/SHA1/SHA224/SHA256/SHA384/SHA512
+ *
+ * \return u32 The address of the initial digest in SRAM
+ */
+cc_sram_addr_t cc_larval_digest_addr(void *drvdata, u32 mode);
+
+void cc_hash_global_init(void);
+
+#endif /*__CC_HASH_H__*/
diff --git a/drivers/crypto/ccree/cc_host_regs.h b/drivers/crypto/ccree/cc_host_regs.h
new file mode 100644
index 000000000000..f51001898ca1
--- /dev/null
+++ b/drivers/crypto/ccree/cc_host_regs.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef __CC_HOST_H__
+#define __CC_HOST_H__
+
+// --------------------------------------
+// BLOCK: HOST_P
+// --------------------------------------
+#define CC_HOST_IRR_REG_OFFSET	0xA00UL
+#define CC_HOST_IRR_DSCRPTR_COMPLETION_LOW_INT_BIT_SHIFT	0x2UL
+#define CC_HOST_IRR_DSCRPTR_COMPLETION_LOW_INT_BIT_SIZE	0x1UL
+#define CC_HOST_IRR_AXI_ERR_INT_BIT_SHIFT	0x8UL
+#define CC_HOST_IRR_AXI_ERR_INT_BIT_SIZE	0x1UL
+#define CC_HOST_IRR_GPR0_BIT_SHIFT	0xBUL
+#define CC_HOST_IRR_GPR0_BIT_SIZE	0x1UL
+#define CC_HOST_IRR_DSCRPTR_WATERMARK_INT_BIT_SHIFT	0x13UL
+#define CC_HOST_IRR_DSCRPTR_WATERMARK_INT_BIT_SIZE	0x1UL
+#define CC_HOST_IRR_AXIM_COMP_INT_BIT_SHIFT	0x17UL
+#define CC_HOST_IRR_AXIM_COMP_INT_BIT_SIZE	0x1UL
+#define CC_HOST_SEP_SRAM_THRESHOLD_REG_OFFSET	0xA10UL
+#define CC_HOST_SEP_SRAM_THRESHOLD_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_SEP_SRAM_THRESHOLD_VALUE_BIT_SIZE	0xCUL
+#define CC_HOST_IMR_REG_OFFSET	0xA04UL
+#define CC_HOST_IMR_NOT_USED_MASK_BIT_SHIFT	0x1UL
+#define CC_HOST_IMR_NOT_USED_MASK_BIT_SIZE	0x1UL
+#define CC_HOST_IMR_DSCRPTR_COMPLETION_MASK_BIT_SHIFT	0x2UL
+#define CC_HOST_IMR_DSCRPTR_COMPLETION_MASK_BIT_SIZE	0x1UL
+#define CC_HOST_IMR_AXI_ERR_MASK_BIT_SHIFT	0x8UL
+#define CC_HOST_IMR_AXI_ERR_MASK_BIT_SIZE	0x1UL
+#define CC_HOST_IMR_GPR0_BIT_SHIFT	0xBUL
+#define CC_HOST_IMR_GPR0_BIT_SIZE	0x1UL
+#define CC_HOST_IMR_DSCRPTR_WATERMARK_MASK0_BIT_SHIFT	0x13UL
+#define CC_HOST_IMR_DSCRPTR_WATERMARK_MASK0_BIT_SIZE	0x1UL
+#define CC_HOST_IMR_AXIM_COMP_INT_MASK_BIT_SHIFT	0x17UL
+#define CC_HOST_IMR_AXIM_COMP_INT_MASK_BIT_SIZE	0x1UL
+#define CC_HOST_ICR_REG_OFFSET	0xA08UL
+#define CC_HOST_ICR_DSCRPTR_COMPLETION_BIT_SHIFT	0x2UL
+#define CC_HOST_ICR_DSCRPTR_COMPLETION_BIT_SIZE	0x1UL
+#define CC_HOST_ICR_AXI_ERR_CLEAR_BIT_SHIFT	0x8UL
+#define CC_HOST_ICR_AXI_ERR_CLEAR_BIT_SIZE	0x1UL
+#define CC_HOST_ICR_GPR_INT_CLEAR_BIT_SHIFT	0xBUL
+#define CC_HOST_ICR_GPR_INT_CLEAR_BIT_SIZE	0x1UL
+#define CC_HOST_ICR_DSCRPTR_WATERMARK_QUEUE0_CLEAR_BIT_SHIFT	0x13UL
+#define CC_HOST_ICR_DSCRPTR_WATERMARK_QUEUE0_CLEAR_BIT_SIZE	0x1UL
+#define CC_HOST_ICR_AXIM_COMP_INT_CLEAR_BIT_SHIFT	0x17UL
+#define CC_HOST_ICR_AXIM_COMP_INT_CLEAR_BIT_SIZE	0x1UL
+#define CC_HOST_SIGNATURE_REG_OFFSET	0xA24UL
+#define CC_HOST_SIGNATURE_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_SIGNATURE_VALUE_BIT_SIZE	0x20UL
+#define CC_HOST_BOOT_REG_OFFSET	0xA28UL
+#define CC_HOST_BOOT_SYNTHESIS_CONFIG_BIT_SHIFT	0x0UL
+#define CC_HOST_BOOT_SYNTHESIS_CONFIG_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_LARGE_RKEK_LOCAL_BIT_SHIFT	0x1UL
+#define CC_HOST_BOOT_LARGE_RKEK_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_HASH_IN_FUSES_LOCAL_BIT_SHIFT	0x2UL
+#define CC_HOST_BOOT_HASH_IN_FUSES_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_EXT_MEM_SECURED_LOCAL_BIT_SHIFT	0x3UL
+#define CC_HOST_BOOT_EXT_MEM_SECURED_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_RKEK_ECC_EXISTS_LOCAL_N_BIT_SHIFT	0x5UL
+#define CC_HOST_BOOT_RKEK_ECC_EXISTS_LOCAL_N_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_SRAM_SIZE_LOCAL_BIT_SHIFT	0x6UL
+#define CC_HOST_BOOT_SRAM_SIZE_LOCAL_BIT_SIZE	0x3UL
+#define CC_HOST_BOOT_DSCRPTR_EXISTS_LOCAL_BIT_SHIFT	0x9UL
+#define CC_HOST_BOOT_DSCRPTR_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_PAU_EXISTS_LOCAL_BIT_SHIFT	0xAUL
+#define CC_HOST_BOOT_PAU_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_RNG_EXISTS_LOCAL_BIT_SHIFT	0xBUL
+#define CC_HOST_BOOT_RNG_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_PKA_EXISTS_LOCAL_BIT_SHIFT	0xCUL
+#define CC_HOST_BOOT_PKA_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_RC4_EXISTS_LOCAL_BIT_SHIFT	0xDUL
+#define CC_HOST_BOOT_RC4_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_SHA_512_PRSNT_LOCAL_BIT_SHIFT	0xEUL
+#define CC_HOST_BOOT_SHA_512_PRSNT_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_SHA_256_PRSNT_LOCAL_BIT_SHIFT	0xFUL
+#define CC_HOST_BOOT_SHA_256_PRSNT_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_MD5_PRSNT_LOCAL_BIT_SHIFT	0x10UL
+#define CC_HOST_BOOT_MD5_PRSNT_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_HASH_EXISTS_LOCAL_BIT_SHIFT	0x11UL
+#define CC_HOST_BOOT_HASH_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_C2_EXISTS_LOCAL_BIT_SHIFT	0x12UL
+#define CC_HOST_BOOT_C2_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_DES_EXISTS_LOCAL_BIT_SHIFT	0x13UL
+#define CC_HOST_BOOT_DES_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_AES_XCBC_MAC_EXISTS_LOCAL_BIT_SHIFT	0x14UL
+#define CC_HOST_BOOT_AES_XCBC_MAC_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_AES_CMAC_EXISTS_LOCAL_BIT_SHIFT	0x15UL
+#define CC_HOST_BOOT_AES_CMAC_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_AES_CCM_EXISTS_LOCAL_BIT_SHIFT	0x16UL
+#define CC_HOST_BOOT_AES_CCM_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_AES_XEX_HW_T_CALC_LOCAL_BIT_SHIFT	0x17UL
+#define CC_HOST_BOOT_AES_XEX_HW_T_CALC_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_AES_XEX_EXISTS_LOCAL_BIT_SHIFT	0x18UL
+#define CC_HOST_BOOT_AES_XEX_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_CTR_EXISTS_LOCAL_BIT_SHIFT	0x19UL
+#define CC_HOST_BOOT_CTR_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_AES_DIN_BYTE_RESOLUTION_LOCAL_BIT_SHIFT	0x1AUL
+#define CC_HOST_BOOT_AES_DIN_BYTE_RESOLUTION_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_TUNNELING_ENB_LOCAL_BIT_SHIFT	0x1BUL
+#define CC_HOST_BOOT_TUNNELING_ENB_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_SUPPORT_256_192_KEY_LOCAL_BIT_SHIFT	0x1CUL
+#define CC_HOST_BOOT_SUPPORT_256_192_KEY_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_ONLY_ENCRYPT_LOCAL_BIT_SHIFT	0x1DUL
+#define CC_HOST_BOOT_ONLY_ENCRYPT_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_BOOT_AES_EXISTS_LOCAL_BIT_SHIFT	0x1EUL
+#define CC_HOST_BOOT_AES_EXISTS_LOCAL_BIT_SIZE	0x1UL
+#define CC_HOST_VERSION_REG_OFFSET	0xA40UL
+#define CC_HOST_VERSION_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_VERSION_VALUE_BIT_SIZE	0x20UL
+#define CC_HOST_KFDE0_VALID_REG_OFFSET	0xA60UL
+#define CC_HOST_KFDE0_VALID_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_KFDE0_VALID_VALUE_BIT_SIZE	0x1UL
+#define CC_HOST_KFDE1_VALID_REG_OFFSET	0xA64UL
+#define CC_HOST_KFDE1_VALID_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_KFDE1_VALID_VALUE_BIT_SIZE	0x1UL
+#define CC_HOST_KFDE2_VALID_REG_OFFSET	0xA68UL
+#define CC_HOST_KFDE2_VALID_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_KFDE2_VALID_VALUE_BIT_SIZE	0x1UL
+#define CC_HOST_KFDE3_VALID_REG_OFFSET	0xA6CUL
+#define CC_HOST_KFDE3_VALID_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_KFDE3_VALID_VALUE_BIT_SIZE	0x1UL
+#define CC_HOST_GPR0_REG_OFFSET	0xA70UL
+#define CC_HOST_GPR0_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_GPR0_VALUE_BIT_SIZE	0x20UL
+#define CC_GPR_HOST_REG_OFFSET	0xA74UL
+#define CC_GPR_HOST_VALUE_BIT_SHIFT	0x0UL
+#define CC_GPR_HOST_VALUE_BIT_SIZE	0x20UL
+#define CC_HOST_POWER_DOWN_EN_REG_OFFSET	0xA78UL
+#define CC_HOST_POWER_DOWN_EN_VALUE_BIT_SHIFT	0x0UL
+#define CC_HOST_POWER_DOWN_EN_VALUE_BIT_SIZE	0x1UL
+// --------------------------------------
+// BLOCK: HOST_SRAM
+// --------------------------------------
+#define CC_SRAM_DATA_REG_OFFSET	0xF00UL
+#define CC_SRAM_DATA_VALUE_BIT_SHIFT	0x0UL
+#define CC_SRAM_DATA_VALUE_BIT_SIZE	0x20UL
+#define CC_SRAM_ADDR_REG_OFFSET	0xF04UL
+#define CC_SRAM_ADDR_VALUE_BIT_SHIFT	0x0UL
+#define CC_SRAM_ADDR_VALUE_BIT_SIZE	0xFUL
+#define CC_SRAM_DATA_READY_REG_OFFSET	0xF08UL
+#define CC_SRAM_DATA_READY_VALUE_BIT_SHIFT	0x0UL
+#define CC_SRAM_DATA_READY_VALUE_BIT_SIZE	0x1UL
+
+#endif //__CC_HOST_H__
diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h
new file mode 100644
index 000000000000..a091ae57f902
--- /dev/null
+++ b/drivers/crypto/ccree/cc_hw_queue_defs.h
@@ -0,0 +1,576 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef __CC_HW_QUEUE_DEFS_H__
+#define __CC_HW_QUEUE_DEFS_H__
+
+#include <linux/types.h>
+
+#include "cc_kernel_regs.h"
+#include <linux/bitfield.h>
+
+/******************************************************************************
+ *				DEFINITIONS
+ ******************************************************************************/
+
+#define HW_DESC_SIZE_WORDS		6
+/* Define max. available slots in HW queue */
+#define HW_QUEUE_SLOTS_MAX              15
+
+#define CC_REG_LOW(word, name)  \
+	(CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SHIFT)
+
+#define CC_REG_HIGH(word, name) \
+	(CC_REG_LOW(word, name) + \
+	 CC_DSCRPTR_QUEUE_WORD ## word ## _ ## name ## _BIT_SIZE - 1)
+
+#define CC_GENMASK(word, name) \
+	GENMASK(CC_REG_HIGH(word, name), CC_REG_LOW(word, name))
+
+#define WORD0_VALUE		CC_GENMASK(0, VALUE)
+#define WORD1_DIN_CONST_VALUE	CC_GENMASK(1, DIN_CONST_VALUE)
+#define WORD1_DIN_DMA_MODE	CC_GENMASK(1, DIN_DMA_MODE)
+#define WORD1_DIN_SIZE		CC_GENMASK(1, DIN_SIZE)
+#define WORD1_NOT_LAST		CC_GENMASK(1, NOT_LAST)
+#define WORD1_NS_BIT		CC_GENMASK(1, NS_BIT)
+#define WORD2_VALUE		CC_GENMASK(2, VALUE)
+#define WORD3_DOUT_DMA_MODE	CC_GENMASK(3, DOUT_DMA_MODE)
+#define WORD3_DOUT_LAST_IND	CC_GENMASK(3, DOUT_LAST_IND)
+#define WORD3_DOUT_SIZE		CC_GENMASK(3, DOUT_SIZE)
+#define WORD3_HASH_XOR_BIT	CC_GENMASK(3, HASH_XOR_BIT)
+#define WORD3_NS_BIT		CC_GENMASK(3, NS_BIT)
+#define WORD3_QUEUE_LAST_IND	CC_GENMASK(3, QUEUE_LAST_IND)
+#define WORD4_ACK_NEEDED	CC_GENMASK(4, ACK_NEEDED)
+#define WORD4_AES_SEL_N_HASH	CC_GENMASK(4, AES_SEL_N_HASH)
+#define WORD4_BYTES_SWAP	CC_GENMASK(4, BYTES_SWAP)
+#define WORD4_CIPHER_CONF0	CC_GENMASK(4, CIPHER_CONF0)
+#define WORD4_CIPHER_CONF1	CC_GENMASK(4, CIPHER_CONF1)
+#define WORD4_CIPHER_CONF2	CC_GENMASK(4, CIPHER_CONF2)
+#define WORD4_CIPHER_DO		CC_GENMASK(4, CIPHER_DO)
+#define WORD4_CIPHER_MODE	CC_GENMASK(4, CIPHER_MODE)
+#define WORD4_CMAC_SIZE0	CC_GENMASK(4, CMAC_SIZE0)
+#define WORD4_DATA_FLOW_MODE	CC_GENMASK(4, DATA_FLOW_MODE)
+#define WORD4_KEY_SIZE		CC_GENMASK(4, KEY_SIZE)
+#define WORD4_SETUP_OPERATION	CC_GENMASK(4, SETUP_OPERATION)
+#define WORD5_DIN_ADDR_HIGH	CC_GENMASK(5, DIN_ADDR_HIGH)
+#define WORD5_DOUT_ADDR_HIGH	CC_GENMASK(5, DOUT_ADDR_HIGH)
+
+/******************************************************************************
+ *				TYPE DEFINITIONS
+ ******************************************************************************/
+
+struct cc_hw_desc {
+	union {
+		u32 word[HW_DESC_SIZE_WORDS];
+		u16 hword[HW_DESC_SIZE_WORDS * 2];
+	};
+};
+
+enum cc_axi_sec {
+	AXI_SECURE = 0,
+	AXI_NOT_SECURE = 1
+};
+
+enum cc_desc_direction {
+	DESC_DIRECTION_ILLEGAL = -1,
+	DESC_DIRECTION_ENCRYPT_ENCRYPT = 0,
+	DESC_DIRECTION_DECRYPT_DECRYPT = 1,
+	DESC_DIRECTION_DECRYPT_ENCRYPT = 3,
+	DESC_DIRECTION_END = S32_MAX,
+};
+
+enum cc_dma_mode {
+	DMA_MODE_NULL		= -1,
+	NO_DMA			= 0,
+	DMA_SRAM		= 1,
+	DMA_DLLI		= 2,
+	DMA_MLLI		= 3,
+	DMA_MODE_END		= S32_MAX,
+};
+
+enum cc_flow_mode {
+	FLOW_MODE_NULL		= -1,
+	/* data flows */
+	BYPASS			= 0,
+	DIN_AES_DOUT		= 1,
+	AES_to_HASH		= 2,
+	AES_and_HASH		= 3,
+	DIN_DES_DOUT		= 4,
+	DES_to_HASH		= 5,
+	DES_and_HASH		= 6,
+	DIN_HASH		= 7,
+	DIN_HASH_and_BYPASS	= 8,
+	AESMAC_and_BYPASS	= 9,
+	AES_to_HASH_and_DOUT	= 10,
+	DIN_RC4_DOUT		= 11,
+	DES_to_HASH_and_DOUT	= 12,
+	AES_to_AES_to_HASH_and_DOUT	= 13,
+	AES_to_AES_to_HASH	= 14,
+	AES_to_HASH_and_AES	= 15,
+	DIN_AES_AESMAC		= 17,
+	HASH_to_DOUT		= 18,
+	/* setup flows */
+	S_DIN_to_AES		= 32,
+	S_DIN_to_AES2		= 33,
+	S_DIN_to_DES		= 34,
+	S_DIN_to_RC4		= 35,
+	S_DIN_to_HASH		= 37,
+	S_AES_to_DOUT		= 38,
+	S_AES2_to_DOUT		= 39,
+	S_RC4_to_DOUT		= 41,
+	S_DES_to_DOUT		= 42,
+	S_HASH_to_DOUT		= 43,
+	SET_FLOW_ID		= 44,
+	FLOW_MODE_END = S32_MAX,
+};
+
+enum cc_setup_op {
+	SETUP_LOAD_NOP		= 0,
+	SETUP_LOAD_STATE0	= 1,
+	SETUP_LOAD_STATE1	= 2,
+	SETUP_LOAD_STATE2	= 3,
+	SETUP_LOAD_KEY0		= 4,
+	SETUP_LOAD_XEX_KEY	= 5,
+	SETUP_WRITE_STATE0	= 8,
+	SETUP_WRITE_STATE1	= 9,
+	SETUP_WRITE_STATE2	= 10,
+	SETUP_WRITE_STATE3	= 11,
+	SETUP_OP_END = S32_MAX,
+};
+
+enum cc_hash_conf_pad {
+	HASH_PADDING_DISABLED = 0,
+	HASH_PADDING_ENABLED = 1,
+	HASH_DIGEST_RESULT_LITTLE_ENDIAN = 2,
+	HASH_CONFIG1_PADDING_RESERVE32 = S32_MAX,
+};
+
+enum cc_aes_mac_selector {
+	AES_SK = 1,
+	AES_CMAC_INIT = 2,
+	AES_CMAC_SIZE0 = 3,
+	AES_MAC_END = S32_MAX,
+};
+
+#define HW_KEY_MASK_CIPHER_DO	  0x3
+#define HW_KEY_SHIFT_CIPHER_CFG2  2
+
+/* HwCryptoKey[1:0] is mapped to cipher_do[1:0] */
+/* HwCryptoKey[2:3] is mapped to cipher_config2[1:0] */
+enum cc_hw_crypto_key {
+	USER_KEY = 0,			/* 0x0000 */
+	ROOT_KEY = 1,			/* 0x0001 */
+	PROVISIONING_KEY = 2,		/* 0x0010 */ /* ==KCP */
+	SESSION_KEY = 3,		/* 0x0011 */
+	RESERVED_KEY = 4,		/* NA */
+	PLATFORM_KEY = 5,		/* 0x0101 */
+	CUSTOMER_KEY = 6,		/* 0x0110 */
+	KFDE0_KEY = 7,			/* 0x0111 */
+	KFDE1_KEY = 9,			/* 0x1001 */
+	KFDE2_KEY = 10,			/* 0x1010 */
+	KFDE3_KEY = 11,			/* 0x1011 */
+	END_OF_KEYS = S32_MAX,
+};
+
+enum cc_hw_aes_key_size {
+	AES_128_KEY = 0,
+	AES_192_KEY = 1,
+	AES_256_KEY = 2,
+	END_OF_AES_KEYS = S32_MAX,
+};
+
+enum cc_hash_cipher_pad {
+	DO_NOT_PAD = 0,
+	DO_PAD = 1,
+	HASH_CIPHER_DO_PADDING_RESERVE32 = S32_MAX,
+};
+
+/*****************************/
+/* Descriptor packing macros */
+/*****************************/
+
+/*
+ * Init a HW descriptor struct
+ * @pdesc: pointer HW descriptor struct
+ */
+static inline void hw_desc_init(struct cc_hw_desc *pdesc)
+{
+	memset(pdesc, 0, sizeof(struct cc_hw_desc));
+}
+
+/*
+ * Indicates the end of current HW descriptors flow and release the HW engines.
+ *
+ * @pdesc: pointer HW descriptor struct
+ */
+static inline void set_queue_last_ind_bit(struct cc_hw_desc *pdesc)
+{
+	pdesc->word[3] |= FIELD_PREP(WORD3_QUEUE_LAST_IND, 1);
+}
+
+/*
+ * Set the DIN field of a HW descriptors
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @dma_mode: dmaMode The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT
+ * @addr: dinAdr DIN address
+ * @size: Data size in bytes
+ * @axi_sec: AXI secure bit
+ */
+static inline void set_din_type(struct cc_hw_desc *pdesc,
+				enum cc_dma_mode dma_mode, dma_addr_t addr,
+				u32 size, enum cc_axi_sec axi_sec)
+{
+	pdesc->word[0] = (u32)addr;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	pdesc->word[5] |= FIELD_PREP(WORD5_DIN_ADDR_HIGH, ((u16)(addr >> 32)));
+#endif
+	pdesc->word[1] |= FIELD_PREP(WORD1_DIN_DMA_MODE, dma_mode) |
+				FIELD_PREP(WORD1_DIN_SIZE, size) |
+				FIELD_PREP(WORD1_NS_BIT, axi_sec);
+}
+
+/*
+ * Set the DIN field of a HW descriptors to NO DMA mode.
+ * Used for NOP descriptor, register patches and other special modes.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @addr: DIN address
+ * @size: Data size in bytes
+ */
+static inline void set_din_no_dma(struct cc_hw_desc *pdesc, u32 addr, u32 size)
+{
+	pdesc->word[0] = addr;
+	pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size);
+}
+
+/*
+ * Set the DIN field of a HW descriptors to SRAM mode.
+ * Note: No need to check SRAM alignment since host requests do not use SRAM and
+ * adaptor will enforce alignment check.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @addr: DIN address
+ * @size Data size in bytes
+ */
+static inline void set_din_sram(struct cc_hw_desc *pdesc, dma_addr_t addr,
+				u32 size)
+{
+	pdesc->word[0] = (u32)addr;
+	pdesc->word[1] |= FIELD_PREP(WORD1_DIN_SIZE, size) |
+				FIELD_PREP(WORD1_DIN_DMA_MODE, DMA_SRAM);
+}
+
+/*
+ * Set the DIN field of a HW descriptors to CONST mode
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @val: DIN const value
+ * @size: Data size in bytes
+ */
+static inline void set_din_const(struct cc_hw_desc *pdesc, u32 val, u32 size)
+{
+	pdesc->word[0] = val;
+	pdesc->word[1] |= FIELD_PREP(WORD1_DIN_CONST_VALUE, 1) |
+			FIELD_PREP(WORD1_DIN_DMA_MODE, DMA_SRAM) |
+			FIELD_PREP(WORD1_DIN_SIZE, size);
+}
+
+/*
+ * Set the DIN not last input data indicator
+ *
+ * @pdesc: pointer HW descriptor struct
+ */
+static inline void set_din_not_last_indication(struct cc_hw_desc *pdesc)
+{
+	pdesc->word[1] |= FIELD_PREP(WORD1_NOT_LAST, 1);
+}
+
+/*
+ * Set the DOUT field of a HW descriptors
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @dma_mode: The DMA mode: NO_DMA, SRAM, DLLI, MLLI, CONSTANT
+ * @addr: DOUT address
+ * @size: Data size in bytes
+ * @axi_sec: AXI secure bit
+ */
+static inline void set_dout_type(struct cc_hw_desc *pdesc,
+				 enum cc_dma_mode dma_mode, dma_addr_t addr,
+				 u32 size, enum cc_axi_sec axi_sec)
+{
+	pdesc->word[2] = (u32)addr;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	pdesc->word[5] |= FIELD_PREP(WORD5_DOUT_ADDR_HIGH, ((u16)(addr >> 32)));
+#endif
+	pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_DMA_MODE, dma_mode) |
+				FIELD_PREP(WORD3_DOUT_SIZE, size) |
+				FIELD_PREP(WORD3_NS_BIT, axi_sec);
+}
+
+/*
+ * Set the DOUT field of a HW descriptors to DLLI type
+ * The LAST INDICATION is provided by the user
+ *
+ * @pdesc pointer HW descriptor struct
+ * @addr: DOUT address
+ * @size: Data size in bytes
+ * @last_ind: The last indication bit
+ * @axi_sec: AXI secure bit
+ */
+static inline void set_dout_dlli(struct cc_hw_desc *pdesc, dma_addr_t addr,
+				 u32 size, enum cc_axi_sec axi_sec,
+				 u32 last_ind)
+{
+	set_dout_type(pdesc, DMA_DLLI, addr, size, axi_sec);
+	pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind);
+}
+
+/*
+ * Set the DOUT field of a HW descriptors to DLLI type
+ * The LAST INDICATION is provided by the user
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @addr: DOUT address
+ * @size: Data size in bytes
+ * @last_ind: The last indication bit
+ * @axi_sec: AXI secure bit
+ */
+static inline void set_dout_mlli(struct cc_hw_desc *pdesc, dma_addr_t addr,
+				 u32 size, enum cc_axi_sec axi_sec,
+				 bool last_ind)
+{
+	set_dout_type(pdesc, DMA_MLLI, addr, size, axi_sec);
+	pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_LAST_IND, last_ind);
+}
+
+/*
+ * Set the DOUT field of a HW descriptors to NO DMA mode.
+ * Used for NOP descriptor, register patches and other special modes.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @addr: DOUT address
+ * @size: Data size in bytes
+ * @write_enable: Enables a write operation to a register
+ */
+static inline void set_dout_no_dma(struct cc_hw_desc *pdesc, u32 addr,
+				   u32 size, bool write_enable)
+{
+	pdesc->word[2] = addr;
+	pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_SIZE, size) |
+			FIELD_PREP(WORD3_DOUT_LAST_IND, write_enable);
+}
+
+/*
+ * Set the word for the XOR operation.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @val: xor data value
+ */
+static inline void set_xor_val(struct cc_hw_desc *pdesc, u32 val)
+{
+	pdesc->word[2] = val;
+}
+
+/*
+ * Sets the XOR indicator bit in the descriptor
+ *
+ * @pdesc: pointer HW descriptor struct
+ */
+static inline void set_xor_active(struct cc_hw_desc *pdesc)
+{
+	pdesc->word[3] |= FIELD_PREP(WORD3_HASH_XOR_BIT, 1);
+}
+
+/*
+ * Select the AES engine instead of HASH engine when setting up combined mode
+ * with AES XCBC MAC
+ *
+ * @pdesc: pointer HW descriptor struct
+ */
+static inline void set_aes_not_hash_mode(struct cc_hw_desc *pdesc)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_AES_SEL_N_HASH, 1);
+}
+
+/*
+ * Set the DOUT field of a HW descriptors to SRAM mode
+ * Note: No need to check SRAM alignment since host requests do not use SRAM and
+ * adaptor will enforce alignment check.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @addr: DOUT address
+ * @size: Data size in bytes
+ */
+static inline void set_dout_sram(struct cc_hw_desc *pdesc, u32 addr, u32 size)
+{
+	pdesc->word[2] = addr;
+	pdesc->word[3] |= FIELD_PREP(WORD3_DOUT_DMA_MODE, DMA_SRAM) |
+			FIELD_PREP(WORD3_DOUT_SIZE, size);
+}
+
+/*
+ * Sets the data unit size for XEX mode in data_out_addr[15:0]
+ *
+ * @pdesc: pDesc pointer HW descriptor struct
+ * @size: data unit size for XEX mode
+ */
+static inline void set_xex_data_unit_size(struct cc_hw_desc *pdesc, u32 size)
+{
+	pdesc->word[2] = size;
+}
+
+/*
+ * Set the number of rounds for Multi2 in data_out_addr[15:0]
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @num: number of rounds for Multi2
+ */
+static inline void set_multi2_num_rounds(struct cc_hw_desc *pdesc, u32 num)
+{
+	pdesc->word[2] = num;
+}
+
+/*
+ * Set the flow mode.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @mode: Any one of the modes defined in [CC7x-DESC]
+ */
+static inline void set_flow_mode(struct cc_hw_desc *pdesc,
+				 enum cc_flow_mode mode)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_DATA_FLOW_MODE, mode);
+}
+
+/*
+ * Set the cipher mode.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @mode:  Any one of the modes defined in [CC7x-DESC]
+ */
+static inline void set_cipher_mode(struct cc_hw_desc *pdesc,
+				   enum drv_cipher_mode mode)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode);
+}
+
+/*
+ * Set the cipher configuration fields.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @mode: Any one of the modes defined in [CC7x-DESC]
+ */
+static inline void set_cipher_config0(struct cc_hw_desc *pdesc,
+				      enum drv_crypto_direction mode)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode);
+}
+
+/*
+ * Set the cipher configuration fields.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @config: Any one of the modes defined in [CC7x-DESC]
+ */
+static inline void set_cipher_config1(struct cc_hw_desc *pdesc,
+				      enum cc_hash_conf_pad config)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF1, config);
+}
+
+/*
+ * Set HW key configuration fields.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @hw_key: The HW key slot asdefined in enum cc_hw_crypto_key
+ */
+static inline void set_hw_crypto_key(struct cc_hw_desc *pdesc,
+				     enum cc_hw_crypto_key hw_key)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_DO,
+				     (hw_key & HW_KEY_MASK_CIPHER_DO)) |
+			FIELD_PREP(WORD4_CIPHER_CONF2,
+				   (hw_key >> HW_KEY_SHIFT_CIPHER_CFG2));
+}
+
+/*
+ * Set byte order of all setup-finalize descriptors.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @config: Any one of the modes defined in [CC7x-DESC]
+ */
+static inline void set_bytes_swap(struct cc_hw_desc *pdesc, bool config)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_BYTES_SWAP, config);
+}
+
+/*
+ * Set CMAC_SIZE0 mode.
+ *
+ * @pdesc: pointer HW descriptor struct
+ */
+static inline void set_cmac_size0_mode(struct cc_hw_desc *pdesc)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_CMAC_SIZE0, 1);
+}
+
+/*
+ * Set key size descriptor field.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @size: key size in bytes (NOT size code)
+ */
+static inline void set_key_size(struct cc_hw_desc *pdesc, u32 size)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_KEY_SIZE, size);
+}
+
+/*
+ * Set AES key size.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @size: key size in bytes (NOT size code)
+ */
+static inline void set_key_size_aes(struct cc_hw_desc *pdesc, u32 size)
+{
+	set_key_size(pdesc, ((size >> 3) - 2));
+}
+
+/*
+ * Set DES key size.
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @size: key size in bytes (NOT size code)
+ */
+static inline void set_key_size_des(struct cc_hw_desc *pdesc, u32 size)
+{
+	set_key_size(pdesc, ((size >> 3) - 1));
+}
+
+/*
+ * Set the descriptor setup mode
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @mode: Any one of the setup modes defined in [CC7x-DESC]
+ */
+static inline void set_setup_mode(struct cc_hw_desc *pdesc,
+				  enum cc_setup_op mode)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_SETUP_OPERATION, mode);
+}
+
+/*
+ * Set the descriptor cipher DO
+ *
+ * @pdesc: pointer HW descriptor struct
+ * @config: Any one of the cipher do defined in [CC7x-DESC]
+ */
+static inline void set_cipher_do(struct cc_hw_desc *pdesc,
+				 enum cc_hash_cipher_pad config)
+{
+	pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_DO,
+				(config & HW_KEY_MASK_CIPHER_DO));
+}
+
+#endif /*__CC_HW_QUEUE_DEFS_H__*/
diff --git a/drivers/crypto/ccree/cc_ivgen.c b/drivers/crypto/ccree/cc_ivgen.c
new file mode 100644
index 000000000000..769458323394
--- /dev/null
+++ b/drivers/crypto/ccree/cc_ivgen.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <crypto/ctr.h>
+#include "cc_driver.h"
+#include "cc_ivgen.h"
+#include "cc_request_mgr.h"
+#include "cc_sram_mgr.h"
+#include "cc_buffer_mgr.h"
+
+/* The max. size of pool *MUST* be <= SRAM total size */
+#define CC_IVPOOL_SIZE 1024
+/* The first 32B fraction of pool are dedicated to the
+ * next encryption "key" & "IV" for pool regeneration
+ */
+#define CC_IVPOOL_META_SIZE (CC_AES_IV_SIZE + AES_KEYSIZE_128)
+#define CC_IVPOOL_GEN_SEQ_LEN	4
+
+/**
+ * struct cc_ivgen_ctx -IV pool generation context
+ * @pool:          the start address of the iv-pool resides in internal RAM
+ * @ctr_key_dma:   address of pool's encryption key material in internal RAM
+ * @ctr_iv_dma:    address of pool's counter iv in internal RAM
+ * @next_iv_ofs:   the offset to the next available IV in pool
+ * @pool_meta:     virt. address of the initial enc. key/IV
+ * @pool_meta_dma: phys. address of the initial enc. key/IV
+ */
+struct cc_ivgen_ctx {
+	cc_sram_addr_t pool;
+	cc_sram_addr_t ctr_key;
+	cc_sram_addr_t ctr_iv;
+	u32 next_iv_ofs;
+	u8 *pool_meta;
+	dma_addr_t pool_meta_dma;
+};
+
+/*!
+ * Generates CC_IVPOOL_SIZE of random bytes by
+ * encrypting 0's using AES128-CTR.
+ *
+ * \param ivgen iv-pool context
+ * \param iv_seq IN/OUT array to the descriptors sequence
+ * \param iv_seq_len IN/OUT pointer to the sequence length
+ */
+static int cc_gen_iv_pool(struct cc_ivgen_ctx *ivgen_ctx,
+			  struct cc_hw_desc iv_seq[], unsigned int *iv_seq_len)
+{
+	unsigned int idx = *iv_seq_len;
+
+	if ((*iv_seq_len + CC_IVPOOL_GEN_SEQ_LEN) > CC_IVPOOL_SEQ_LEN) {
+		/* The sequence will be longer than allowed */
+		return -EINVAL;
+	}
+	/* Setup key */
+	hw_desc_init(&iv_seq[idx]);
+	set_din_sram(&iv_seq[idx], ivgen_ctx->ctr_key, AES_KEYSIZE_128);
+	set_setup_mode(&iv_seq[idx], SETUP_LOAD_KEY0);
+	set_cipher_config0(&iv_seq[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_flow_mode(&iv_seq[idx], S_DIN_to_AES);
+	set_key_size_aes(&iv_seq[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_cipher_mode(&iv_seq[idx], DRV_CIPHER_CTR);
+	idx++;
+
+	/* Setup cipher state */
+	hw_desc_init(&iv_seq[idx]);
+	set_din_sram(&iv_seq[idx], ivgen_ctx->ctr_iv, CC_AES_IV_SIZE);
+	set_cipher_config0(&iv_seq[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
+	set_flow_mode(&iv_seq[idx], S_DIN_to_AES);
+	set_setup_mode(&iv_seq[idx], SETUP_LOAD_STATE1);
+	set_key_size_aes(&iv_seq[idx], CC_AES_128_BIT_KEY_SIZE);
+	set_cipher_mode(&iv_seq[idx], DRV_CIPHER_CTR);
+	idx++;
+
+	/* Perform dummy encrypt to skip first block */
+	hw_desc_init(&iv_seq[idx]);
+	set_din_const(&iv_seq[idx], 0, CC_AES_IV_SIZE);
+	set_dout_sram(&iv_seq[idx], ivgen_ctx->pool, CC_AES_IV_SIZE);
+	set_flow_mode(&iv_seq[idx], DIN_AES_DOUT);
+	idx++;
+
+	/* Generate IV pool */
+	hw_desc_init(&iv_seq[idx]);
+	set_din_const(&iv_seq[idx], 0, CC_IVPOOL_SIZE);
+	set_dout_sram(&iv_seq[idx], ivgen_ctx->pool, CC_IVPOOL_SIZE);
+	set_flow_mode(&iv_seq[idx], DIN_AES_DOUT);
+	idx++;
+
+	*iv_seq_len = idx; /* Update sequence length */
+
+	/* queue ordering assures pool readiness */
+	ivgen_ctx->next_iv_ofs = CC_IVPOOL_META_SIZE;
+
+	return 0;
+}
+
+/*!
+ * Generates the initial pool in SRAM.
+ * This function should be invoked when resuming driver.
+ *
+ * \param drvdata
+ *
+ * \return int Zero for success, negative value otherwise.
+ */
+int cc_init_iv_sram(struct cc_drvdata *drvdata)
+{
+	struct cc_ivgen_ctx *ivgen_ctx = drvdata->ivgen_handle;
+	struct cc_hw_desc iv_seq[CC_IVPOOL_SEQ_LEN];
+	unsigned int iv_seq_len = 0;
+	int rc;
+
+	/* Generate initial enc. key/iv */
+	get_random_bytes(ivgen_ctx->pool_meta, CC_IVPOOL_META_SIZE);
+
+	/* The first 32B reserved for the enc. Key/IV */
+	ivgen_ctx->ctr_key = ivgen_ctx->pool;
+	ivgen_ctx->ctr_iv = ivgen_ctx->pool + AES_KEYSIZE_128;
+
+	/* Copy initial enc. key and IV to SRAM at a single descriptor */
+	hw_desc_init(&iv_seq[iv_seq_len]);
+	set_din_type(&iv_seq[iv_seq_len], DMA_DLLI, ivgen_ctx->pool_meta_dma,
+		     CC_IVPOOL_META_SIZE, NS_BIT);
+	set_dout_sram(&iv_seq[iv_seq_len], ivgen_ctx->pool,
+		      CC_IVPOOL_META_SIZE);
+	set_flow_mode(&iv_seq[iv_seq_len], BYPASS);
+	iv_seq_len++;
+
+	/* Generate initial pool */
+	rc = cc_gen_iv_pool(ivgen_ctx, iv_seq, &iv_seq_len);
+	if (rc)
+		return rc;
+
+	/* Fire-and-forget */
+	return send_request_init(drvdata, iv_seq, iv_seq_len);
+}
+
+/*!
+ * Free iv-pool and ivgen context.
+ *
+ * \param drvdata
+ */
+void cc_ivgen_fini(struct cc_drvdata *drvdata)
+{
+	struct cc_ivgen_ctx *ivgen_ctx = drvdata->ivgen_handle;
+	struct device *device = &drvdata->plat_dev->dev;
+
+	if (!ivgen_ctx)
+		return;
+
+	if (ivgen_ctx->pool_meta) {
+		memset(ivgen_ctx->pool_meta, 0, CC_IVPOOL_META_SIZE);
+		dma_free_coherent(device, CC_IVPOOL_META_SIZE,
+				  ivgen_ctx->pool_meta,
+				  ivgen_ctx->pool_meta_dma);
+	}
+
+	ivgen_ctx->pool = NULL_SRAM_ADDR;
+
+	/* release "this" context */
+	kfree(ivgen_ctx);
+}
+
+/*!
+ * Allocates iv-pool and maps resources.
+ * This function generates the first IV pool.
+ *
+ * \param drvdata Driver's private context
+ *
+ * \return int Zero for success, negative value otherwise.
+ */
+int cc_ivgen_init(struct cc_drvdata *drvdata)
+{
+	struct cc_ivgen_ctx *ivgen_ctx;
+	struct device *device = &drvdata->plat_dev->dev;
+	int rc;
+
+	/* Allocate "this" context */
+	ivgen_ctx = kzalloc(sizeof(*ivgen_ctx), GFP_KERNEL);
+	if (!ivgen_ctx)
+		return -ENOMEM;
+
+	/* Allocate pool's header for initial enc. key/IV */
+	ivgen_ctx->pool_meta = dma_alloc_coherent(device, CC_IVPOOL_META_SIZE,
+						  &ivgen_ctx->pool_meta_dma,
+						  GFP_KERNEL);
+	if (!ivgen_ctx->pool_meta) {
+		dev_err(device, "Not enough memory to allocate DMA of pool_meta (%u B)\n",
+			CC_IVPOOL_META_SIZE);
+		rc = -ENOMEM;
+		goto out;
+	}
+	/* Allocate IV pool in SRAM */
+	ivgen_ctx->pool = cc_sram_alloc(drvdata, CC_IVPOOL_SIZE);
+	if (ivgen_ctx->pool == NULL_SRAM_ADDR) {
+		dev_err(device, "SRAM pool exhausted\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	drvdata->ivgen_handle = ivgen_ctx;
+
+	return cc_init_iv_sram(drvdata);
+
+out:
+	cc_ivgen_fini(drvdata);
+	return rc;
+}
+
+/*!
+ * Acquires 16 Bytes IV from the iv-pool
+ *
+ * \param drvdata Driver private context
+ * \param iv_out_dma Array of physical IV out addresses
+ * \param iv_out_dma_len Length of iv_out_dma array (additional elements
+ *                       of iv_out_dma array are ignore)
+ * \param iv_out_size May be 8 or 16 bytes long
+ * \param iv_seq IN/OUT array to the descriptors sequence
+ * \param iv_seq_len IN/OUT pointer to the sequence length
+ *
+ * \return int Zero for success, negative value otherwise.
+ */
+int cc_get_iv(struct cc_drvdata *drvdata, dma_addr_t iv_out_dma[],
+	      unsigned int iv_out_dma_len, unsigned int iv_out_size,
+	      struct cc_hw_desc iv_seq[], unsigned int *iv_seq_len)
+{
+	struct cc_ivgen_ctx *ivgen_ctx = drvdata->ivgen_handle;
+	unsigned int idx = *iv_seq_len;
+	struct device *dev = drvdata_to_dev(drvdata);
+	unsigned int t;
+
+	if (iv_out_size != CC_AES_IV_SIZE &&
+	    iv_out_size != CTR_RFC3686_IV_SIZE) {
+		return -EINVAL;
+	}
+	if ((iv_out_dma_len + 1) > CC_IVPOOL_SEQ_LEN) {
+		/* The sequence will be longer than allowed */
+		return -EINVAL;
+	}
+
+	/* check that number of generated IV is limited to max dma address
+	 * iv buffer size
+	 */
+	if (iv_out_dma_len > CC_MAX_IVGEN_DMA_ADDRESSES) {
+		/* The sequence will be longer than allowed */
+		return -EINVAL;
+	}
+
+	for (t = 0; t < iv_out_dma_len; t++) {
+		/* Acquire IV from pool */
+		hw_desc_init(&iv_seq[idx]);
+		set_din_sram(&iv_seq[idx], (ivgen_ctx->pool +
+					    ivgen_ctx->next_iv_ofs),
+			     iv_out_size);
+		set_dout_dlli(&iv_seq[idx], iv_out_dma[t], iv_out_size,
+			      NS_BIT, 0);
+		set_flow_mode(&iv_seq[idx], BYPASS);
+		idx++;
+	}
+
+	/* Bypass operation is proceeded by crypto sequence, hence must
+	 *  assure bypass-write-transaction by a memory barrier
+	 */
+	hw_desc_init(&iv_seq[idx]);
+	set_din_no_dma(&iv_seq[idx], 0, 0xfffff0);
+	set_dout_no_dma(&iv_seq[idx], 0, 0, 1);
+	idx++;
+
+	*iv_seq_len = idx; /* update seq length */
+
+	/* Update iv index */
+	ivgen_ctx->next_iv_ofs += iv_out_size;
+
+	if ((CC_IVPOOL_SIZE - ivgen_ctx->next_iv_ofs) < CC_AES_IV_SIZE) {
+		dev_dbg(dev, "Pool exhausted, regenerating iv-pool\n");
+		/* pool is drained -regenerate it! */
+		return cc_gen_iv_pool(ivgen_ctx, iv_seq, iv_seq_len);
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccree/cc_ivgen.h b/drivers/crypto/ccree/cc_ivgen.h
new file mode 100644
index 000000000000..b6ac16903dda
--- /dev/null
+++ b/drivers/crypto/ccree/cc_ivgen.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef __CC_IVGEN_H__
+#define __CC_IVGEN_H__
+
+#include "cc_hw_queue_defs.h"
+
+#define CC_IVPOOL_SEQ_LEN 8
+
+/*!
+ * Allocates iv-pool and maps resources.
+ * This function generates the first IV pool.
+ *
+ * \param drvdata Driver's private context
+ *
+ * \return int Zero for success, negative value otherwise.
+ */
+int cc_ivgen_init(struct cc_drvdata *drvdata);
+
+/*!
+ * Free iv-pool and ivgen context.
+ *
+ * \param drvdata
+ */
+void cc_ivgen_fini(struct cc_drvdata *drvdata);
+
+/*!
+ * Generates the initial pool in SRAM.
+ * This function should be invoked when resuming DX driver.
+ *
+ * \param drvdata
+ *
+ * \return int Zero for success, negative value otherwise.
+ */
+int cc_init_iv_sram(struct cc_drvdata *drvdata);
+
+/*!
+ * Acquires 16 Bytes IV from the iv-pool
+ *
+ * \param drvdata Driver private context
+ * \param iv_out_dma Array of physical IV out addresses
+ * \param iv_out_dma_len Length of iv_out_dma array (additional elements of
+ *                       iv_out_dma array are ignore)
+ * \param iv_out_size May be 8 or 16 bytes long
+ * \param iv_seq IN/OUT array to the descriptors sequence
+ * \param iv_seq_len IN/OUT pointer to the sequence length
+ *
+ * \return int Zero for success, negative value otherwise.
+ */
+int cc_get_iv(struct cc_drvdata *drvdata, dma_addr_t iv_out_dma[],
+	      unsigned int iv_out_dma_len, unsigned int iv_out_size,
+	      struct cc_hw_desc iv_seq[], unsigned int *iv_seq_len);
+
+#endif /*__CC_IVGEN_H__*/
diff --git a/drivers/crypto/ccree/cc_kernel_regs.h b/drivers/crypto/ccree/cc_kernel_regs.h
new file mode 100644
index 000000000000..8d7262a35156
--- /dev/null
+++ b/drivers/crypto/ccree/cc_kernel_regs.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef __CC_CRYS_KERNEL_H__
+#define __CC_CRYS_KERNEL_H__
+
+// --------------------------------------
+// BLOCK: DSCRPTR
+// --------------------------------------
+#define CC_DSCRPTR_COMPLETION_COUNTER_REG_OFFSET	0xE00UL
+#define CC_DSCRPTR_COMPLETION_COUNTER_COMPLETION_COUNTER_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_COMPLETION_COUNTER_COMPLETION_COUNTER_BIT_SIZE	0x6UL
+#define CC_DSCRPTR_COMPLETION_COUNTER_OVERFLOW_COUNTER_BIT_SHIFT	0x6UL
+#define CC_DSCRPTR_COMPLETION_COUNTER_OVERFLOW_COUNTER_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_SW_RESET_REG_OFFSET	0xE40UL
+#define CC_DSCRPTR_SW_RESET_VALUE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_SW_RESET_VALUE_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_SRAM_SIZE_REG_OFFSET	0xE60UL
+#define CC_DSCRPTR_QUEUE_SRAM_SIZE_NUM_OF_DSCRPTR_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_SRAM_SIZE_NUM_OF_DSCRPTR_BIT_SIZE	0xAUL
+#define CC_DSCRPTR_QUEUE_SRAM_SIZE_DSCRPTR_SRAM_SIZE_BIT_SHIFT	0xAUL
+#define CC_DSCRPTR_QUEUE_SRAM_SIZE_DSCRPTR_SRAM_SIZE_BIT_SIZE	0xCUL
+#define CC_DSCRPTR_QUEUE_SRAM_SIZE_SRAM_SIZE_BIT_SHIFT	0x16UL
+#define CC_DSCRPTR_QUEUE_SRAM_SIZE_SRAM_SIZE_BIT_SIZE	0x3UL
+#define CC_DSCRPTR_SINGLE_ADDR_EN_REG_OFFSET	0xE64UL
+#define CC_DSCRPTR_SINGLE_ADDR_EN_VALUE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_SINGLE_ADDR_EN_VALUE_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_MEASURE_CNTR_REG_OFFSET	0xE68UL
+#define CC_DSCRPTR_MEASURE_CNTR_VALUE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_MEASURE_CNTR_VALUE_BIT_SIZE	0x20UL
+#define CC_DSCRPTR_QUEUE_WORD0_REG_OFFSET	0xE80UL
+#define CC_DSCRPTR_QUEUE_WORD0_VALUE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_WORD0_VALUE_BIT_SIZE	0x20UL
+#define CC_DSCRPTR_QUEUE_WORD1_REG_OFFSET	0xE84UL
+#define CC_DSCRPTR_QUEUE_WORD1_DIN_DMA_MODE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_WORD1_DIN_DMA_MODE_BIT_SIZE	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD1_DIN_SIZE_BIT_SHIFT	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD1_DIN_SIZE_BIT_SIZE	0x18UL
+#define CC_DSCRPTR_QUEUE_WORD1_NS_BIT_BIT_SHIFT	0x1AUL
+#define CC_DSCRPTR_QUEUE_WORD1_NS_BIT_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD1_DIN_CONST_VALUE_BIT_SHIFT	0x1BUL
+#define CC_DSCRPTR_QUEUE_WORD1_DIN_CONST_VALUE_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD1_NOT_LAST_BIT_SHIFT	0x1CUL
+#define CC_DSCRPTR_QUEUE_WORD1_NOT_LAST_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD1_LOCK_QUEUE_BIT_SHIFT	0x1DUL
+#define CC_DSCRPTR_QUEUE_WORD1_LOCK_QUEUE_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD1_NOT_USED_BIT_SHIFT	0x1EUL
+#define CC_DSCRPTR_QUEUE_WORD1_NOT_USED_BIT_SIZE	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD2_REG_OFFSET	0xE88UL
+#define CC_DSCRPTR_QUEUE_WORD2_VALUE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_WORD2_VALUE_BIT_SIZE	0x20UL
+#define CC_DSCRPTR_QUEUE_WORD3_REG_OFFSET	0xE8CUL
+#define CC_DSCRPTR_QUEUE_WORD3_DOUT_DMA_MODE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_WORD3_DOUT_DMA_MODE_BIT_SIZE	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD3_DOUT_SIZE_BIT_SHIFT	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD3_DOUT_SIZE_BIT_SIZE	0x18UL
+#define CC_DSCRPTR_QUEUE_WORD3_NS_BIT_BIT_SHIFT	0x1AUL
+#define CC_DSCRPTR_QUEUE_WORD3_NS_BIT_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD3_DOUT_LAST_IND_BIT_SHIFT	0x1BUL
+#define CC_DSCRPTR_QUEUE_WORD3_DOUT_LAST_IND_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD3_HASH_XOR_BIT_BIT_SHIFT	0x1DUL
+#define CC_DSCRPTR_QUEUE_WORD3_HASH_XOR_BIT_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD3_NOT_USED_BIT_SHIFT	0x1EUL
+#define CC_DSCRPTR_QUEUE_WORD3_NOT_USED_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD3_QUEUE_LAST_IND_BIT_SHIFT	0x1FUL
+#define CC_DSCRPTR_QUEUE_WORD3_QUEUE_LAST_IND_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD4_REG_OFFSET	0xE90UL
+#define CC_DSCRPTR_QUEUE_WORD4_DATA_FLOW_MODE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_WORD4_DATA_FLOW_MODE_BIT_SIZE	0x6UL
+#define CC_DSCRPTR_QUEUE_WORD4_AES_SEL_N_HASH_BIT_SHIFT	0x6UL
+#define CC_DSCRPTR_QUEUE_WORD4_AES_SEL_N_HASH_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD4_AES_XOR_CRYPTO_KEY_BIT_SHIFT	0x7UL
+#define CC_DSCRPTR_QUEUE_WORD4_AES_XOR_CRYPTO_KEY_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD4_ACK_NEEDED_BIT_SHIFT	0x8UL
+#define CC_DSCRPTR_QUEUE_WORD4_ACK_NEEDED_BIT_SIZE	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_MODE_BIT_SHIFT	0xAUL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_MODE_BIT_SIZE	0x4UL
+#define CC_DSCRPTR_QUEUE_WORD4_CMAC_SIZE0_BIT_SHIFT	0xEUL
+#define CC_DSCRPTR_QUEUE_WORD4_CMAC_SIZE0_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_DO_BIT_SHIFT	0xFUL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_DO_BIT_SIZE	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_CONF0_BIT_SHIFT	0x11UL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_CONF0_BIT_SIZE	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_CONF1_BIT_SHIFT	0x13UL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_CONF1_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_CONF2_BIT_SHIFT	0x14UL
+#define CC_DSCRPTR_QUEUE_WORD4_CIPHER_CONF2_BIT_SIZE	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD4_KEY_SIZE_BIT_SHIFT	0x16UL
+#define CC_DSCRPTR_QUEUE_WORD4_KEY_SIZE_BIT_SIZE	0x2UL
+#define CC_DSCRPTR_QUEUE_WORD4_SETUP_OPERATION_BIT_SHIFT	0x18UL
+#define CC_DSCRPTR_QUEUE_WORD4_SETUP_OPERATION_BIT_SIZE	0x4UL
+#define CC_DSCRPTR_QUEUE_WORD4_DIN_SRAM_ENDIANNESS_BIT_SHIFT	0x1CUL
+#define CC_DSCRPTR_QUEUE_WORD4_DIN_SRAM_ENDIANNESS_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD4_DOUT_SRAM_ENDIANNESS_BIT_SHIFT	0x1DUL
+#define CC_DSCRPTR_QUEUE_WORD4_DOUT_SRAM_ENDIANNESS_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD4_WORD_SWAP_BIT_SHIFT	0x1EUL
+#define CC_DSCRPTR_QUEUE_WORD4_WORD_SWAP_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD4_BYTES_SWAP_BIT_SHIFT	0x1FUL
+#define CC_DSCRPTR_QUEUE_WORD4_BYTES_SWAP_BIT_SIZE	0x1UL
+#define CC_DSCRPTR_QUEUE_WORD5_REG_OFFSET	0xE94UL
+#define CC_DSCRPTR_QUEUE_WORD5_DIN_ADDR_HIGH_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_WORD5_DIN_ADDR_HIGH_BIT_SIZE	0x10UL
+#define CC_DSCRPTR_QUEUE_WORD5_DOUT_ADDR_HIGH_BIT_SHIFT	0x10UL
+#define CC_DSCRPTR_QUEUE_WORD5_DOUT_ADDR_HIGH_BIT_SIZE	0x10UL
+#define CC_DSCRPTR_QUEUE_WATERMARK_REG_OFFSET	0xE98UL
+#define CC_DSCRPTR_QUEUE_WATERMARK_VALUE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_WATERMARK_VALUE_BIT_SIZE	0xAUL
+#define CC_DSCRPTR_QUEUE_CONTENT_REG_OFFSET	0xE9CUL
+#define CC_DSCRPTR_QUEUE_CONTENT_VALUE_BIT_SHIFT	0x0UL
+#define CC_DSCRPTR_QUEUE_CONTENT_VALUE_BIT_SIZE	0xAUL
+// --------------------------------------
+// BLOCK: AXI_P
+// --------------------------------------
+#define CC_AXIM_MON_INFLIGHT_REG_OFFSET	0xB00UL
+#define CC_AXIM_MON_INFLIGHT_VALUE_BIT_SHIFT	0x0UL
+#define CC_AXIM_MON_INFLIGHT_VALUE_BIT_SIZE	0x8UL
+#define CC_AXIM_MON_INFLIGHTLAST_REG_OFFSET	0xB40UL
+#define CC_AXIM_MON_INFLIGHTLAST_VALUE_BIT_SHIFT	0x0UL
+#define CC_AXIM_MON_INFLIGHTLAST_VALUE_BIT_SIZE	0x8UL
+#define CC_AXIM_MON_COMP_REG_OFFSET	0xB80UL
+#define CC_AXIM_MON_COMP8_REG_OFFSET	0xBA0UL
+#define CC_AXIM_MON_COMP_VALUE_BIT_SHIFT	0x0UL
+#define CC_AXIM_MON_COMP_VALUE_BIT_SIZE	0x10UL
+#define CC_AXIM_MON_ERR_REG_OFFSET	0xBC4UL
+#define CC_AXIM_MON_ERR_BRESP_BIT_SHIFT	0x0UL
+#define CC_AXIM_MON_ERR_BRESP_BIT_SIZE	0x2UL
+#define CC_AXIM_MON_ERR_BID_BIT_SHIFT	0x2UL
+#define CC_AXIM_MON_ERR_BID_BIT_SIZE	0x4UL
+#define CC_AXIM_MON_ERR_RRESP_BIT_SHIFT	0x10UL
+#define CC_AXIM_MON_ERR_RRESP_BIT_SIZE	0x2UL
+#define CC_AXIM_MON_ERR_RID_BIT_SHIFT	0x12UL
+#define CC_AXIM_MON_ERR_RID_BIT_SIZE	0x4UL
+#define CC_AXIM_CFG_REG_OFFSET	0xBE8UL
+#define CC_AXIM_CFG_BRESPMASK_BIT_SHIFT	0x4UL
+#define CC_AXIM_CFG_BRESPMASK_BIT_SIZE	0x1UL
+#define CC_AXIM_CFG_RRESPMASK_BIT_SHIFT	0x5UL
+#define CC_AXIM_CFG_RRESPMASK_BIT_SIZE	0x1UL
+#define CC_AXIM_CFG_INFLTMASK_BIT_SHIFT	0x6UL
+#define CC_AXIM_CFG_INFLTMASK_BIT_SIZE	0x1UL
+#define CC_AXIM_CFG_COMPMASK_BIT_SHIFT	0x7UL
+#define CC_AXIM_CFG_COMPMASK_BIT_SIZE	0x1UL
+#define CC_AXIM_ACE_CONST_REG_OFFSET	0xBECUL
+#define CC_AXIM_ACE_CONST_ARDOMAIN_BIT_SHIFT	0x0UL
+#define CC_AXIM_ACE_CONST_ARDOMAIN_BIT_SIZE	0x2UL
+#define CC_AXIM_ACE_CONST_AWDOMAIN_BIT_SHIFT	0x2UL
+#define CC_AXIM_ACE_CONST_AWDOMAIN_BIT_SIZE	0x2UL
+#define CC_AXIM_ACE_CONST_ARBAR_BIT_SHIFT	0x4UL
+#define CC_AXIM_ACE_CONST_ARBAR_BIT_SIZE	0x2UL
+#define CC_AXIM_ACE_CONST_AWBAR_BIT_SHIFT	0x6UL
+#define CC_AXIM_ACE_CONST_AWBAR_BIT_SIZE	0x2UL
+#define CC_AXIM_ACE_CONST_ARSNOOP_BIT_SHIFT	0x8UL
+#define CC_AXIM_ACE_CONST_ARSNOOP_BIT_SIZE	0x4UL
+#define CC_AXIM_ACE_CONST_AWSNOOP_NOT_ALIGNED_BIT_SHIFT	0xCUL
+#define CC_AXIM_ACE_CONST_AWSNOOP_NOT_ALIGNED_BIT_SIZE	0x3UL
+#define CC_AXIM_ACE_CONST_AWSNOOP_ALIGNED_BIT_SHIFT	0xFUL
+#define CC_AXIM_ACE_CONST_AWSNOOP_ALIGNED_BIT_SIZE	0x3UL
+#define CC_AXIM_ACE_CONST_AWADDR_NOT_MASKED_BIT_SHIFT	0x12UL
+#define CC_AXIM_ACE_CONST_AWADDR_NOT_MASKED_BIT_SIZE	0x7UL
+#define CC_AXIM_ACE_CONST_AWLEN_VAL_BIT_SHIFT	0x19UL
+#define CC_AXIM_ACE_CONST_AWLEN_VAL_BIT_SIZE	0x4UL
+#define CC_AXIM_CACHE_PARAMS_REG_OFFSET	0xBF0UL
+#define CC_AXIM_CACHE_PARAMS_AWCACHE_LAST_BIT_SHIFT	0x0UL
+#define CC_AXIM_CACHE_PARAMS_AWCACHE_LAST_BIT_SIZE	0x4UL
+#define CC_AXIM_CACHE_PARAMS_AWCACHE_BIT_SHIFT	0x4UL
+#define CC_AXIM_CACHE_PARAMS_AWCACHE_BIT_SIZE	0x4UL
+#define CC_AXIM_CACHE_PARAMS_ARCACHE_BIT_SHIFT	0x8UL
+#define CC_AXIM_CACHE_PARAMS_ARCACHE_BIT_SIZE	0x4UL
+#endif	// __CC_CRYS_KERNEL_H__
diff --git a/drivers/crypto/ccree/cc_lli_defs.h b/drivers/crypto/ccree/cc_lli_defs.h
new file mode 100644
index 000000000000..64b15ac9f1d3
--- /dev/null
+++ b/drivers/crypto/ccree/cc_lli_defs.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef _CC_LLI_DEFS_H_
+#define _CC_LLI_DEFS_H_
+
+#include <linux/types.h>
+
+/* Max DLLI size
+ *  AKA CC_DSCRPTR_QUEUE_WORD1_DIN_SIZE_BIT_SIZE
+ */
+#define DLLI_SIZE_BIT_SIZE	0x18
+
+#define CC_MAX_MLLI_ENTRY_SIZE 0xFFFF
+
+#define LLI_MAX_NUM_OF_DATA_ENTRIES 128
+#define LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES 4
+#define MLLI_TABLE_MIN_ALIGNMENT 4 /* 32 bit alignment */
+#define MAX_NUM_OF_BUFFERS_IN_MLLI 4
+#define MAX_NUM_OF_TOTAL_MLLI_ENTRIES \
+		(2 * LLI_MAX_NUM_OF_DATA_ENTRIES + \
+		 LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES)
+
+/* Size of entry */
+#define LLI_ENTRY_WORD_SIZE 2
+#define LLI_ENTRY_BYTE_SIZE (LLI_ENTRY_WORD_SIZE * sizeof(u32))
+
+/* Word0[31:0] = ADDR[31:0] */
+#define LLI_WORD0_OFFSET 0
+#define LLI_LADDR_BIT_OFFSET 0
+#define LLI_LADDR_BIT_SIZE 32
+/* Word1[31:16] = ADDR[47:32]; Word1[15:0] = SIZE */
+#define LLI_WORD1_OFFSET 1
+#define LLI_SIZE_BIT_OFFSET 0
+#define LLI_SIZE_BIT_SIZE 16
+#define LLI_HADDR_BIT_OFFSET 16
+#define LLI_HADDR_BIT_SIZE 16
+
+#define LLI_SIZE_MASK GENMASK((LLI_SIZE_BIT_SIZE - 1), LLI_SIZE_BIT_OFFSET)
+#define LLI_HADDR_MASK GENMASK( \
+			       (LLI_HADDR_BIT_OFFSET + LLI_HADDR_BIT_SIZE - 1),\
+				LLI_HADDR_BIT_OFFSET)
+
+static inline void cc_lli_set_addr(u32 *lli_p, dma_addr_t addr)
+{
+	lli_p[LLI_WORD0_OFFSET] = (addr & U32_MAX);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	lli_p[LLI_WORD1_OFFSET] &= ~LLI_HADDR_MASK;
+	lli_p[LLI_WORD1_OFFSET] |= FIELD_PREP(LLI_HADDR_MASK, (addr >> 32));
+#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
+}
+
+static inline void cc_lli_set_size(u32 *lli_p, u16 size)
+{
+	lli_p[LLI_WORD1_OFFSET] &= ~LLI_SIZE_MASK;
+	lli_p[LLI_WORD1_OFFSET] |= FIELD_PREP(LLI_SIZE_MASK, size);
+}
+
+#endif /*_CC_LLI_DEFS_H_*/
diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c
new file mode 100644
index 000000000000..d990f472e89f
--- /dev/null
+++ b/drivers/crypto/ccree/cc_pm.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include "cc_driver.h"
+#include "cc_buffer_mgr.h"
+#include "cc_request_mgr.h"
+#include "cc_sram_mgr.h"
+#include "cc_ivgen.h"
+#include "cc_hash.h"
+#include "cc_pm.h"
+
+#define POWER_DOWN_ENABLE 0x01
+#define POWER_DOWN_DISABLE 0x00
+
+const struct dev_pm_ops ccree_pm = {
+	SET_RUNTIME_PM_OPS(cc_pm_suspend, cc_pm_resume, NULL)
+};
+
+int cc_pm_suspend(struct device *dev)
+{
+	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
+	int rc;
+
+	dev_dbg(dev, "set HOST_POWER_DOWN_EN\n");
+	cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE);
+	rc = cc_suspend_req_queue(drvdata);
+	if (rc) {
+		dev_err(dev, "cc_suspend_req_queue (%x)\n", rc);
+		return rc;
+	}
+	fini_cc_regs(drvdata);
+	cc_clk_off(drvdata);
+	return 0;
+}
+
+int cc_pm_resume(struct device *dev)
+{
+	int rc;
+	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
+
+	dev_dbg(dev, "unset HOST_POWER_DOWN_EN\n");
+	cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_DISABLE);
+
+	rc = cc_clk_on(drvdata);
+	if (rc) {
+		dev_err(dev, "failed getting clock back on. We're toast.\n");
+		return rc;
+	}
+
+	rc = init_cc_regs(drvdata, false);
+	if (rc) {
+		dev_err(dev, "init_cc_regs (%x)\n", rc);
+		return rc;
+	}
+
+	rc = cc_resume_req_queue(drvdata);
+	if (rc) {
+		dev_err(dev, "cc_resume_req_queue (%x)\n", rc);
+		return rc;
+	}
+
+	/* must be after the queue resuming as it uses the HW queue*/
+	cc_init_hash_sram(drvdata);
+
+	cc_init_iv_sram(drvdata);
+	return 0;
+}
+
+int cc_pm_get(struct device *dev)
+{
+	int rc = 0;
+	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (cc_req_queue_suspended(drvdata))
+		rc = pm_runtime_get_sync(dev);
+	else
+		pm_runtime_get_noresume(dev);
+
+	return rc;
+}
+
+int cc_pm_put_suspend(struct device *dev)
+{
+	int rc = 0;
+	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
+
+	if (!cc_req_queue_suspended(drvdata)) {
+		pm_runtime_mark_last_busy(dev);
+		rc = pm_runtime_put_autosuspend(dev);
+	} else {
+		/* Something wrong happens*/
+		dev_err(dev, "request to suspend already suspended queue");
+		rc = -EBUSY;
+	}
+	return rc;
+}
+
+int cc_pm_init(struct cc_drvdata *drvdata)
+{
+	int rc = 0;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	/* must be before the enabling to avoid resdundent suspending */
+	pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(dev);
+	/* activate the PM module */
+	rc = pm_runtime_set_active(dev);
+	if (rc)
+		return rc;
+	/* enable the PM module*/
+	pm_runtime_enable(dev);
+
+	return rc;
+}
+
+void cc_pm_fini(struct cc_drvdata *drvdata)
+{
+	pm_runtime_disable(drvdata_to_dev(drvdata));
+}
diff --git a/drivers/crypto/ccree/cc_pm.h b/drivers/crypto/ccree/cc_pm.h
new file mode 100644
index 000000000000..020a5403c58b
--- /dev/null
+++ b/drivers/crypto/ccree/cc_pm.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+/* \file cc_pm.h
+ */
+
+#ifndef __CC_POWER_MGR_H__
+#define __CC_POWER_MGR_H__
+
+#include "cc_driver.h"
+
+#define CC_SUSPEND_TIMEOUT 3000
+
+#if defined(CONFIG_PM)
+
+extern const struct dev_pm_ops ccree_pm;
+
+int cc_pm_init(struct cc_drvdata *drvdata);
+void cc_pm_fini(struct cc_drvdata *drvdata);
+int cc_pm_suspend(struct device *dev);
+int cc_pm_resume(struct device *dev);
+int cc_pm_get(struct device *dev);
+int cc_pm_put_suspend(struct device *dev);
+
+#else
+
+static inline int cc_pm_init(struct cc_drvdata *drvdata)
+{
+	return 0;
+}
+
+static inline void cc_pm_fini(struct cc_drvdata *drvdata) {}
+
+static inline int cc_pm_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static inline int cc_pm_resume(struct device *dev)
+{
+	return 0;
+}
+
+static inline int cc_pm_get(struct device *dev)
+{
+	return 0;
+}
+
+static inline int cc_pm_put_suspend(struct device *dev)
+{
+	return 0;
+}
+
+#endif
+
+#endif /*__POWER_MGR_H__*/
diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c
new file mode 100644
index 000000000000..83a8aaae61c7
--- /dev/null
+++ b/drivers/crypto/ccree/cc_request_mgr.c
@@ -0,0 +1,711 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include <linux/kernel.h>
+#include "cc_driver.h"
+#include "cc_buffer_mgr.h"
+#include "cc_request_mgr.h"
+#include "cc_ivgen.h"
+#include "cc_pm.h"
+
+#define CC_MAX_POLL_ITER	10
+/* The highest descriptor count in used */
+#define CC_MAX_DESC_SEQ_LEN	23
+
+struct cc_req_mgr_handle {
+	/* Request manager resources */
+	unsigned int hw_queue_size; /* HW capability */
+	unsigned int min_free_hw_slots;
+	unsigned int max_used_sw_slots;
+	struct cc_crypto_req req_queue[MAX_REQUEST_QUEUE_SIZE];
+	u32 req_queue_head;
+	u32 req_queue_tail;
+	u32 axi_completed;
+	u32 q_free_slots;
+	/* This lock protects access to HW register
+	 * that must be single request at a time
+	 */
+	spinlock_t hw_lock;
+	struct cc_hw_desc compl_desc;
+	u8 *dummy_comp_buff;
+	dma_addr_t dummy_comp_buff_dma;
+
+	/* backlog queue */
+	struct list_head backlog;
+	unsigned int bl_len;
+	spinlock_t bl_lock; /* protect backlog queue */
+
+#ifdef COMP_IN_WQ
+	struct workqueue_struct *workq;
+	struct delayed_work compwork;
+#else
+	struct tasklet_struct comptask;
+#endif
+	bool is_runtime_suspended;
+};
+
+struct cc_bl_item {
+	struct cc_crypto_req creq;
+	struct cc_hw_desc desc[CC_MAX_DESC_SEQ_LEN];
+	unsigned int len;
+	struct list_head list;
+	bool notif;
+};
+
+static void comp_handler(unsigned long devarg);
+#ifdef COMP_IN_WQ
+static void comp_work_handler(struct work_struct *work);
+#endif
+
+void cc_req_mgr_fini(struct cc_drvdata *drvdata)
+{
+	struct cc_req_mgr_handle *req_mgr_h = drvdata->request_mgr_handle;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	if (!req_mgr_h)
+		return; /* Not allocated */
+
+	if (req_mgr_h->dummy_comp_buff_dma) {
+		dma_free_coherent(dev, sizeof(u32), req_mgr_h->dummy_comp_buff,
+				  req_mgr_h->dummy_comp_buff_dma);
+	}
+
+	dev_dbg(dev, "max_used_hw_slots=%d\n", (req_mgr_h->hw_queue_size -
+						req_mgr_h->min_free_hw_slots));
+	dev_dbg(dev, "max_used_sw_slots=%d\n", req_mgr_h->max_used_sw_slots);
+
+#ifdef COMP_IN_WQ
+	flush_workqueue(req_mgr_h->workq);
+	destroy_workqueue(req_mgr_h->workq);
+#else
+	/* Kill tasklet */
+	tasklet_kill(&req_mgr_h->comptask);
+#endif
+	kzfree(req_mgr_h);
+	drvdata->request_mgr_handle = NULL;
+}
+
+int cc_req_mgr_init(struct cc_drvdata *drvdata)
+{
+	struct cc_req_mgr_handle *req_mgr_h;
+	struct device *dev = drvdata_to_dev(drvdata);
+	int rc = 0;
+
+	req_mgr_h = kzalloc(sizeof(*req_mgr_h), GFP_KERNEL);
+	if (!req_mgr_h) {
+		rc = -ENOMEM;
+		goto req_mgr_init_err;
+	}
+
+	drvdata->request_mgr_handle = req_mgr_h;
+
+	spin_lock_init(&req_mgr_h->hw_lock);
+	spin_lock_init(&req_mgr_h->bl_lock);
+	INIT_LIST_HEAD(&req_mgr_h->backlog);
+
+#ifdef COMP_IN_WQ
+	dev_dbg(dev, "Initializing completion workqueue\n");
+	req_mgr_h->workq = create_singlethread_workqueue("ccree");
+	if (!req_mgr_h->workq) {
+		dev_err(dev, "Failed creating work queue\n");
+		rc = -ENOMEM;
+		goto req_mgr_init_err;
+	}
+	INIT_DELAYED_WORK(&req_mgr_h->compwork, comp_work_handler);
+#else
+	dev_dbg(dev, "Initializing completion tasklet\n");
+	tasklet_init(&req_mgr_h->comptask, comp_handler,
+		     (unsigned long)drvdata);
+#endif
+	req_mgr_h->hw_queue_size = cc_ioread(drvdata,
+					     CC_REG(DSCRPTR_QUEUE_SRAM_SIZE));
+	dev_dbg(dev, "hw_queue_size=0x%08X\n", req_mgr_h->hw_queue_size);
+	if (req_mgr_h->hw_queue_size < MIN_HW_QUEUE_SIZE) {
+		dev_err(dev, "Invalid HW queue size = %u (Min. required is %u)\n",
+			req_mgr_h->hw_queue_size, MIN_HW_QUEUE_SIZE);
+		rc = -ENOMEM;
+		goto req_mgr_init_err;
+	}
+	req_mgr_h->min_free_hw_slots = req_mgr_h->hw_queue_size;
+	req_mgr_h->max_used_sw_slots = 0;
+
+	/* Allocate DMA word for "dummy" completion descriptor use */
+	req_mgr_h->dummy_comp_buff =
+		dma_alloc_coherent(dev, sizeof(u32),
+				   &req_mgr_h->dummy_comp_buff_dma,
+				   GFP_KERNEL);
+	if (!req_mgr_h->dummy_comp_buff) {
+		dev_err(dev, "Not enough memory to allocate DMA (%zu) dropped buffer\n",
+			sizeof(u32));
+		rc = -ENOMEM;
+		goto req_mgr_init_err;
+	}
+
+	/* Init. "dummy" completion descriptor */
+	hw_desc_init(&req_mgr_h->compl_desc);
+	set_din_const(&req_mgr_h->compl_desc, 0, sizeof(u32));
+	set_dout_dlli(&req_mgr_h->compl_desc, req_mgr_h->dummy_comp_buff_dma,
+		      sizeof(u32), NS_BIT, 1);
+	set_flow_mode(&req_mgr_h->compl_desc, BYPASS);
+	set_queue_last_ind(drvdata, &req_mgr_h->compl_desc);
+
+	return 0;
+
+req_mgr_init_err:
+	cc_req_mgr_fini(drvdata);
+	return rc;
+}
+
+static void enqueue_seq(struct cc_drvdata *drvdata, struct cc_hw_desc seq[],
+			unsigned int seq_len)
+{
+	int i, w;
+	void __iomem *reg = drvdata->cc_base + CC_REG(DSCRPTR_QUEUE_WORD0);
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	/*
+	 * We do indeed write all 6 command words to the same
+	 * register. The HW supports this.
+	 */
+
+	for (i = 0; i < seq_len; i++) {
+		for (w = 0; w <= 5; w++)
+			writel_relaxed(seq[i].word[w], reg);
+
+		if (cc_dump_desc)
+			dev_dbg(dev, "desc[%02d]: 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n",
+				i, seq[i].word[0], seq[i].word[1],
+				seq[i].word[2], seq[i].word[3],
+				seq[i].word[4], seq[i].word[5]);
+	}
+}
+
+/*!
+ * Completion will take place if and only if user requested completion
+ * by cc_send_sync_request().
+ *
+ * \param dev
+ * \param dx_compl_h The completion event to signal
+ */
+static void request_mgr_complete(struct device *dev, void *dx_compl_h,
+				 int dummy)
+{
+	struct completion *this_compl = dx_compl_h;
+
+	complete(this_compl);
+}
+
+static int cc_queues_status(struct cc_drvdata *drvdata,
+			    struct cc_req_mgr_handle *req_mgr_h,
+			    unsigned int total_seq_len)
+{
+	unsigned long poll_queue;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	/* SW queue is checked only once as it will not
+	 * be chaned during the poll because the spinlock_bh
+	 * is held by the thread
+	 */
+	if (((req_mgr_h->req_queue_head + 1) & (MAX_REQUEST_QUEUE_SIZE - 1)) ==
+	    req_mgr_h->req_queue_tail) {
+		dev_err(dev, "SW FIFO is full. req_queue_head=%d sw_fifo_len=%d\n",
+			req_mgr_h->req_queue_head, MAX_REQUEST_QUEUE_SIZE);
+		return -ENOSPC;
+	}
+
+	if (req_mgr_h->q_free_slots >= total_seq_len)
+		return 0;
+
+	/* Wait for space in HW queue. Poll constant num of iterations. */
+	for (poll_queue = 0; poll_queue < CC_MAX_POLL_ITER ; poll_queue++) {
+		req_mgr_h->q_free_slots =
+			cc_ioread(drvdata, CC_REG(DSCRPTR_QUEUE_CONTENT));
+		if (req_mgr_h->q_free_slots < req_mgr_h->min_free_hw_slots)
+			req_mgr_h->min_free_hw_slots = req_mgr_h->q_free_slots;
+
+		if (req_mgr_h->q_free_slots >= total_seq_len) {
+			/* If there is enough place return */
+			return 0;
+		}
+
+		dev_dbg(dev, "HW FIFO is full. q_free_slots=%d total_seq_len=%d\n",
+			req_mgr_h->q_free_slots, total_seq_len);
+	}
+	/* No room in the HW queue try again later */
+	dev_dbg(dev, "HW FIFO full, timeout. req_queue_head=%d sw_fifo_len=%d q_free_slots=%d total_seq_len=%d\n",
+		req_mgr_h->req_queue_head, MAX_REQUEST_QUEUE_SIZE,
+		req_mgr_h->q_free_slots, total_seq_len);
+	return -ENOSPC;
+}
+
+/*!
+ * Enqueue caller request to crypto hardware.
+ * Need to be called with HW lock held and PM running
+ *
+ * \param drvdata
+ * \param cc_req The request to enqueue
+ * \param desc The crypto sequence
+ * \param len The crypto sequence length
+ * \param add_comp If "true": add an artificial dout DMA to mark completion
+ *
+ * \return int Returns -EINPROGRESS or error code
+ */
+static int cc_do_send_request(struct cc_drvdata *drvdata,
+			      struct cc_crypto_req *cc_req,
+			      struct cc_hw_desc *desc, unsigned int len,
+				bool add_comp, bool ivgen)
+{
+	struct cc_req_mgr_handle *req_mgr_h = drvdata->request_mgr_handle;
+	unsigned int used_sw_slots;
+	unsigned int iv_seq_len = 0;
+	unsigned int total_seq_len = len; /*initial sequence length*/
+	struct cc_hw_desc iv_seq[CC_IVPOOL_SEQ_LEN];
+	struct device *dev = drvdata_to_dev(drvdata);
+	int rc;
+
+	if (ivgen) {
+		dev_dbg(dev, "Acquire IV from pool into %d DMA addresses %pad, %pad, %pad, IV-size=%u\n",
+			cc_req->ivgen_dma_addr_len,
+			&cc_req->ivgen_dma_addr[0],
+			&cc_req->ivgen_dma_addr[1],
+			&cc_req->ivgen_dma_addr[2],
+			cc_req->ivgen_size);
+
+		/* Acquire IV from pool */
+		rc = cc_get_iv(drvdata, cc_req->ivgen_dma_addr,
+			       cc_req->ivgen_dma_addr_len,
+			       cc_req->ivgen_size, iv_seq, &iv_seq_len);
+
+		if (rc) {
+			dev_err(dev, "Failed to generate IV (rc=%d)\n", rc);
+			return rc;
+		}
+
+		total_seq_len += iv_seq_len;
+	}
+
+	used_sw_slots = ((req_mgr_h->req_queue_head -
+			  req_mgr_h->req_queue_tail) &
+			 (MAX_REQUEST_QUEUE_SIZE - 1));
+	if (used_sw_slots > req_mgr_h->max_used_sw_slots)
+		req_mgr_h->max_used_sw_slots = used_sw_slots;
+
+	/* Enqueue request - must be locked with HW lock*/
+	req_mgr_h->req_queue[req_mgr_h->req_queue_head] = *cc_req;
+	req_mgr_h->req_queue_head = (req_mgr_h->req_queue_head + 1) &
+				    (MAX_REQUEST_QUEUE_SIZE - 1);
+	/* TODO: Use circ_buf.h ? */
+
+	dev_dbg(dev, "Enqueue request head=%u\n", req_mgr_h->req_queue_head);
+
+	/*
+	 * We are about to push command to the HW via the command registers
+	 * that may refernece hsot memory. We need to issue a memory barrier
+	 * to make sure there are no outstnading memory writes
+	 */
+	wmb();
+
+	/* STAT_PHASE_4: Push sequence */
+	if (ivgen)
+		enqueue_seq(drvdata, iv_seq, iv_seq_len);
+
+	enqueue_seq(drvdata, desc, len);
+
+	if (add_comp) {
+		enqueue_seq(drvdata, &req_mgr_h->compl_desc, 1);
+		total_seq_len++;
+	}
+
+	if (req_mgr_h->q_free_slots < total_seq_len) {
+		/* This situation should never occur. Maybe indicating problem
+		 * with resuming power. Set the free slot count to 0 and hope
+		 * for the best.
+		 */
+		dev_err(dev, "HW free slot count mismatch.");
+		req_mgr_h->q_free_slots = 0;
+	} else {
+		/* Update the free slots in HW queue */
+		req_mgr_h->q_free_slots -= total_seq_len;
+	}
+
+	/* Operation still in process */
+	return -EINPROGRESS;
+}
+
+static void cc_enqueue_backlog(struct cc_drvdata *drvdata,
+			       struct cc_bl_item *bli)
+{
+	struct cc_req_mgr_handle *mgr = drvdata->request_mgr_handle;
+
+	spin_lock_bh(&mgr->bl_lock);
+	list_add_tail(&bli->list, &mgr->backlog);
+	++mgr->bl_len;
+	spin_unlock_bh(&mgr->bl_lock);
+	tasklet_schedule(&mgr->comptask);
+}
+
+static void cc_proc_backlog(struct cc_drvdata *drvdata)
+{
+	struct cc_req_mgr_handle *mgr = drvdata->request_mgr_handle;
+	struct cc_bl_item *bli;
+	struct cc_crypto_req *creq;
+	struct crypto_async_request *req;
+	bool ivgen;
+	unsigned int total_len;
+	struct device *dev = drvdata_to_dev(drvdata);
+	int rc;
+
+	spin_lock(&mgr->bl_lock);
+
+	while (mgr->bl_len) {
+		bli = list_first_entry(&mgr->backlog, struct cc_bl_item, list);
+		spin_unlock(&mgr->bl_lock);
+
+		creq = &bli->creq;
+		req = (struct crypto_async_request *)creq->user_arg;
+
+		/*
+		 * Notify the request we're moving out of the backlog
+		 * but only if we haven't done so already.
+		 */
+		if (!bli->notif) {
+			req->complete(req, -EINPROGRESS);
+			bli->notif = true;
+		}
+
+		ivgen = !!creq->ivgen_dma_addr_len;
+		total_len = bli->len + (ivgen ? CC_IVPOOL_SEQ_LEN : 0);
+
+		spin_lock(&mgr->hw_lock);
+
+		rc = cc_queues_status(drvdata, mgr, total_len);
+		if (rc) {
+			/*
+			 * There is still not room in the FIFO for
+			 * this request. Bail out. We'll return here
+			 * on the next completion irq.
+			 */
+			spin_unlock(&mgr->hw_lock);
+			return;
+		}
+
+		rc = cc_do_send_request(drvdata, &bli->creq, bli->desc,
+					bli->len, false, ivgen);
+
+		spin_unlock(&mgr->hw_lock);
+
+		if (rc != -EINPROGRESS) {
+			cc_pm_put_suspend(dev);
+			creq->user_cb(dev, req, rc);
+		}
+
+		/* Remove ourselves from the backlog list */
+		spin_lock(&mgr->bl_lock);
+		list_del(&bli->list);
+		--mgr->bl_len;
+	}
+
+	spin_unlock(&mgr->bl_lock);
+}
+
+int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req,
+		    struct cc_hw_desc *desc, unsigned int len,
+		    struct crypto_async_request *req)
+{
+	int rc;
+	struct cc_req_mgr_handle *mgr = drvdata->request_mgr_handle;
+	bool ivgen = !!cc_req->ivgen_dma_addr_len;
+	unsigned int total_len = len + (ivgen ? CC_IVPOOL_SEQ_LEN : 0);
+	struct device *dev = drvdata_to_dev(drvdata);
+	bool backlog_ok = req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG;
+	gfp_t flags = cc_gfp_flags(req);
+	struct cc_bl_item *bli;
+
+	rc = cc_pm_get(dev);
+	if (rc) {
+		dev_err(dev, "ssi_power_mgr_runtime_get returned %x\n", rc);
+		return rc;
+	}
+
+	spin_lock_bh(&mgr->hw_lock);
+	rc = cc_queues_status(drvdata, mgr, total_len);
+
+#ifdef CC_DEBUG_FORCE_BACKLOG
+	if (backlog_ok)
+		rc = -ENOSPC;
+#endif /* CC_DEBUG_FORCE_BACKLOG */
+
+	if (rc == -ENOSPC && backlog_ok) {
+		spin_unlock_bh(&mgr->hw_lock);
+
+		bli = kmalloc(sizeof(*bli), flags);
+		if (!bli) {
+			cc_pm_put_suspend(dev);
+			return -ENOMEM;
+		}
+
+		memcpy(&bli->creq, cc_req, sizeof(*cc_req));
+		memcpy(&bli->desc, desc, len * sizeof(*desc));
+		bli->len = len;
+		bli->notif = false;
+		cc_enqueue_backlog(drvdata, bli);
+		return -EBUSY;
+	}
+
+	if (!rc)
+		rc = cc_do_send_request(drvdata, cc_req, desc, len, false,
+					ivgen);
+
+	spin_unlock_bh(&mgr->hw_lock);
+	return rc;
+}
+
+int cc_send_sync_request(struct cc_drvdata *drvdata,
+			 struct cc_crypto_req *cc_req, struct cc_hw_desc *desc,
+			 unsigned int len)
+{
+	int rc;
+	struct device *dev = drvdata_to_dev(drvdata);
+	struct cc_req_mgr_handle *mgr = drvdata->request_mgr_handle;
+
+	init_completion(&cc_req->seq_compl);
+	cc_req->user_cb = request_mgr_complete;
+	cc_req->user_arg = &cc_req->seq_compl;
+
+	rc = cc_pm_get(dev);
+	if (rc) {
+		dev_err(dev, "ssi_power_mgr_runtime_get returned %x\n", rc);
+		return rc;
+	}
+
+	while (true) {
+		spin_lock_bh(&mgr->hw_lock);
+		rc = cc_queues_status(drvdata, mgr, len + 1);
+
+		if (!rc)
+			break;
+
+		spin_unlock_bh(&mgr->hw_lock);
+		if (rc != -EAGAIN) {
+			cc_pm_put_suspend(dev);
+			return rc;
+		}
+		wait_for_completion_interruptible(&drvdata->hw_queue_avail);
+		reinit_completion(&drvdata->hw_queue_avail);
+	}
+
+	rc = cc_do_send_request(drvdata, cc_req, desc, len, true, false);
+	spin_unlock_bh(&mgr->hw_lock);
+
+	if (rc != -EINPROGRESS) {
+		cc_pm_put_suspend(dev);
+		return rc;
+	}
+
+	wait_for_completion(&cc_req->seq_compl);
+	return 0;
+}
+
+/*!
+ * Enqueue caller request to crypto hardware during init process.
+ * assume this function is not called in middle of a flow,
+ * since we set QUEUE_LAST_IND flag in the last descriptor.
+ *
+ * \param drvdata
+ * \param desc The crypto sequence
+ * \param len The crypto sequence length
+ *
+ * \return int Returns "0" upon success
+ */
+int send_request_init(struct cc_drvdata *drvdata, struct cc_hw_desc *desc,
+		      unsigned int len)
+{
+	struct cc_req_mgr_handle *req_mgr_h = drvdata->request_mgr_handle;
+	unsigned int total_seq_len = len; /*initial sequence length*/
+	int rc = 0;
+
+	/* Wait for space in HW and SW FIFO. Poll for as much as FIFO_TIMEOUT.
+	 */
+	rc = cc_queues_status(drvdata, req_mgr_h, total_seq_len);
+	if (rc)
+		return rc;
+
+	set_queue_last_ind(drvdata, &desc[(len - 1)]);
+
+	/*
+	 * We are about to push command to the HW via the command registers
+	 * that may refernece hsot memory. We need to issue a memory barrier
+	 * to make sure there are no outstnading memory writes
+	 */
+	wmb();
+	enqueue_seq(drvdata, desc, len);
+
+	/* Update the free slots in HW queue */
+	req_mgr_h->q_free_slots =
+		cc_ioread(drvdata, CC_REG(DSCRPTR_QUEUE_CONTENT));
+
+	return 0;
+}
+
+void complete_request(struct cc_drvdata *drvdata)
+{
+	struct cc_req_mgr_handle *request_mgr_handle =
+						drvdata->request_mgr_handle;
+
+	complete(&drvdata->hw_queue_avail);
+#ifdef COMP_IN_WQ
+	queue_delayed_work(request_mgr_handle->workq,
+			   &request_mgr_handle->compwork, 0);
+#else
+	tasklet_schedule(&request_mgr_handle->comptask);
+#endif
+}
+
+#ifdef COMP_IN_WQ
+static void comp_work_handler(struct work_struct *work)
+{
+	struct cc_drvdata *drvdata =
+		container_of(work, struct cc_drvdata, compwork.work);
+
+	comp_handler((unsigned long)drvdata);
+}
+#endif
+
+static void proc_completions(struct cc_drvdata *drvdata)
+{
+	struct cc_crypto_req *cc_req;
+	struct device *dev = drvdata_to_dev(drvdata);
+	struct cc_req_mgr_handle *request_mgr_handle =
+						drvdata->request_mgr_handle;
+	unsigned int *tail = &request_mgr_handle->req_queue_tail;
+	unsigned int *head = &request_mgr_handle->req_queue_head;
+
+	while (request_mgr_handle->axi_completed) {
+		request_mgr_handle->axi_completed--;
+
+		/* Dequeue request */
+		if (*head == *tail) {
+			/* We are supposed to handle a completion but our
+			 * queue is empty. This is not normal. Return and
+			 * hope for the best.
+			 */
+			dev_err(dev, "Request queue is empty head == tail %u\n",
+				*head);
+			break;
+		}
+
+		cc_req = &request_mgr_handle->req_queue[*tail];
+
+		if (cc_req->user_cb)
+			cc_req->user_cb(dev, cc_req->user_arg, 0);
+		*tail = (*tail + 1) & (MAX_REQUEST_QUEUE_SIZE - 1);
+		dev_dbg(dev, "Dequeue request tail=%u\n", *tail);
+		dev_dbg(dev, "Request completed. axi_completed=%d\n",
+			request_mgr_handle->axi_completed);
+		cc_pm_put_suspend(dev);
+	}
+}
+
+static inline u32 cc_axi_comp_count(struct cc_drvdata *drvdata)
+{
+	return FIELD_GET(AXIM_MON_COMP_VALUE,
+			 cc_ioread(drvdata, drvdata->axim_mon_offset));
+}
+
+/* Deferred service handler, run as interrupt-fired tasklet */
+static void comp_handler(unsigned long devarg)
+{
+	struct cc_drvdata *drvdata = (struct cc_drvdata *)devarg;
+	struct cc_req_mgr_handle *request_mgr_handle =
+						drvdata->request_mgr_handle;
+
+	u32 irq;
+
+	irq = (drvdata->irq & CC_COMP_IRQ_MASK);
+
+	if (irq & CC_COMP_IRQ_MASK) {
+		/* To avoid the interrupt from firing as we unmask it,
+		 * we clear it now
+		 */
+		cc_iowrite(drvdata, CC_REG(HOST_ICR), CC_COMP_IRQ_MASK);
+
+		/* Avoid race with above clear: Test completion counter
+		 * once more
+		 */
+		request_mgr_handle->axi_completed +=
+				cc_axi_comp_count(drvdata);
+
+		while (request_mgr_handle->axi_completed) {
+			do {
+				proc_completions(drvdata);
+				/* At this point (after proc_completions()),
+				 * request_mgr_handle->axi_completed is 0.
+				 */
+				request_mgr_handle->axi_completed =
+						cc_axi_comp_count(drvdata);
+			} while (request_mgr_handle->axi_completed > 0);
+
+			cc_iowrite(drvdata, CC_REG(HOST_ICR),
+				   CC_COMP_IRQ_MASK);
+
+			request_mgr_handle->axi_completed +=
+					cc_axi_comp_count(drvdata);
+		}
+	}
+	/* after verifing that there is nothing to do,
+	 * unmask AXI completion interrupt
+	 */
+	cc_iowrite(drvdata, CC_REG(HOST_IMR),
+		   cc_ioread(drvdata, CC_REG(HOST_IMR)) & ~irq);
+
+	cc_proc_backlog(drvdata);
+}
+
+/*
+ * resume the queue configuration - no need to take the lock as this happens
+ * inside the spin lock protection
+ */
+#if defined(CONFIG_PM)
+int cc_resume_req_queue(struct cc_drvdata *drvdata)
+{
+	struct cc_req_mgr_handle *request_mgr_handle =
+		drvdata->request_mgr_handle;
+
+	spin_lock_bh(&request_mgr_handle->hw_lock);
+	request_mgr_handle->is_runtime_suspended = false;
+	spin_unlock_bh(&request_mgr_handle->hw_lock);
+
+	return 0;
+}
+
+/*
+ * suspend the queue configuration. Since it is used for the runtime suspend
+ * only verify that the queue can be suspended.
+ */
+int cc_suspend_req_queue(struct cc_drvdata *drvdata)
+{
+	struct cc_req_mgr_handle *request_mgr_handle =
+						drvdata->request_mgr_handle;
+
+	/* lock the send_request */
+	spin_lock_bh(&request_mgr_handle->hw_lock);
+	if (request_mgr_handle->req_queue_head !=
+	    request_mgr_handle->req_queue_tail) {
+		spin_unlock_bh(&request_mgr_handle->hw_lock);
+		return -EBUSY;
+	}
+	request_mgr_handle->is_runtime_suspended = true;
+	spin_unlock_bh(&request_mgr_handle->hw_lock);
+
+	return 0;
+}
+
+bool cc_req_queue_suspended(struct cc_drvdata *drvdata)
+{
+	struct cc_req_mgr_handle *request_mgr_handle =
+						drvdata->request_mgr_handle;
+
+	return	request_mgr_handle->is_runtime_suspended;
+}
+
+#endif
diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h
new file mode 100644
index 000000000000..573cb97af085
--- /dev/null
+++ b/drivers/crypto/ccree/cc_request_mgr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+/* \file cc_request_mgr.h
+ * Request Manager
+ */
+
+#ifndef __REQUEST_MGR_H__
+#define __REQUEST_MGR_H__
+
+#include "cc_hw_queue_defs.h"
+
+int cc_req_mgr_init(struct cc_drvdata *drvdata);
+
+/*!
+ * Enqueue caller request to crypto hardware.
+ *
+ * \param drvdata
+ * \param cc_req The request to enqueue
+ * \param desc The crypto sequence
+ * \param len The crypto sequence length
+ * \param is_dout If "true": completion is handled by the caller
+ *	  If "false": this function adds a dummy descriptor completion
+ *	  and waits upon completion signal.
+ *
+ * \return int Returns -EINPROGRESS or error
+ */
+int cc_send_request(struct cc_drvdata *drvdata, struct cc_crypto_req *cc_req,
+		    struct cc_hw_desc *desc, unsigned int len,
+		    struct crypto_async_request *req);
+
+int cc_send_sync_request(struct cc_drvdata *drvdata,
+			 struct cc_crypto_req *cc_req, struct cc_hw_desc *desc,
+			 unsigned int len);
+
+int send_request_init(struct cc_drvdata *drvdata, struct cc_hw_desc *desc,
+		      unsigned int len);
+
+void complete_request(struct cc_drvdata *drvdata);
+
+void cc_req_mgr_fini(struct cc_drvdata *drvdata);
+
+#if defined(CONFIG_PM)
+int cc_resume_req_queue(struct cc_drvdata *drvdata);
+
+int cc_suspend_req_queue(struct cc_drvdata *drvdata);
+
+bool cc_req_queue_suspended(struct cc_drvdata *drvdata);
+#endif
+
+#endif /*__REQUEST_MGR_H__*/
diff --git a/drivers/crypto/ccree/cc_sram_mgr.c b/drivers/crypto/ccree/cc_sram_mgr.c
new file mode 100644
index 000000000000..c8c276f6dee9
--- /dev/null
+++ b/drivers/crypto/ccree/cc_sram_mgr.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#include "cc_driver.h"
+#include "cc_sram_mgr.h"
+
+/**
+ * struct cc_sram_ctx -Internal RAM context manager
+ * @sram_free_offset:   the offset to the non-allocated area
+ */
+struct cc_sram_ctx {
+	cc_sram_addr_t sram_free_offset;
+};
+
+/**
+ * cc_sram_mgr_fini() - Cleanup SRAM pool.
+ *
+ * @drvdata: Associated device driver context
+ */
+void cc_sram_mgr_fini(struct cc_drvdata *drvdata)
+{
+	/* Free "this" context */
+	kfree(drvdata->sram_mgr_handle);
+}
+
+/**
+ * cc_sram_mgr_init() - Initializes SRAM pool.
+ *      The pool starts right at the beginning of SRAM.
+ *      Returns zero for success, negative value otherwise.
+ *
+ * @drvdata: Associated device driver context
+ */
+int cc_sram_mgr_init(struct cc_drvdata *drvdata)
+{
+	struct cc_sram_ctx *ctx;
+	dma_addr_t start = 0;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	if (drvdata->hw_rev < CC_HW_REV_712) {
+		/* Pool starts after ROM bytes */
+		start = (dma_addr_t)cc_ioread(drvdata,
+					      CC_REG(HOST_SEP_SRAM_THRESHOLD));
+
+		if ((start & 0x3) != 0) {
+			dev_err(dev, "Invalid SRAM offset %pad\n", &start);
+			return -EINVAL;
+		}
+	}
+
+	/* Allocate "this" context */
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->sram_free_offset = start;
+	drvdata->sram_mgr_handle = ctx;
+
+	return 0;
+}
+
+/*!
+ * Allocated buffer from SRAM pool.
+ * Note: Caller is responsible to free the LAST allocated buffer.
+ * This function does not taking care of any fragmentation may occur
+ * by the order of calls to alloc/free.
+ *
+ * \param drvdata
+ * \param size The requested bytes to allocate
+ */
+cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size)
+{
+	struct cc_sram_ctx *smgr_ctx = drvdata->sram_mgr_handle;
+	struct device *dev = drvdata_to_dev(drvdata);
+	cc_sram_addr_t p;
+
+	if ((size & 0x3)) {
+		dev_err(dev, "Requested buffer size (%u) is not multiple of 4",
+			size);
+		return NULL_SRAM_ADDR;
+	}
+	if (size > (CC_CC_SRAM_SIZE - smgr_ctx->sram_free_offset)) {
+		dev_err(dev, "Not enough space to allocate %u B (at offset %llu)\n",
+			size, smgr_ctx->sram_free_offset);
+		return NULL_SRAM_ADDR;
+	}
+
+	p = smgr_ctx->sram_free_offset;
+	smgr_ctx->sram_free_offset += size;
+	dev_dbg(dev, "Allocated %u B @ %u\n", size, (unsigned int)p);
+	return p;
+}
+
+/**
+ * cc_set_sram_desc() - Create const descriptors sequence to
+ *	set values in given array into SRAM.
+ * Note: each const value can't exceed word size.
+ *
+ * @src:	  A pointer to array of words to set as consts.
+ * @dst:	  The target SRAM buffer to set into
+ * @nelements:	  The number of words in "src" array
+ * @seq:	  A pointer to the given IN/OUT descriptor sequence
+ * @seq_len:	  A pointer to the given IN/OUT sequence length
+ */
+void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst,
+		      unsigned int nelement, struct cc_hw_desc *seq,
+		      unsigned int *seq_len)
+{
+	u32 i;
+	unsigned int idx = *seq_len;
+
+	for (i = 0; i < nelement; i++, idx++) {
+		hw_desc_init(&seq[idx]);
+		set_din_const(&seq[idx], src[i], sizeof(u32));
+		set_dout_sram(&seq[idx], dst + (i * sizeof(u32)), sizeof(u32));
+		set_flow_mode(&seq[idx], BYPASS);
+	}
+
+	*seq_len = idx;
+}
diff --git a/drivers/crypto/ccree/cc_sram_mgr.h b/drivers/crypto/ccree/cc_sram_mgr.h
new file mode 100644
index 000000000000..d48649fb3323
--- /dev/null
+++ b/drivers/crypto/ccree/cc_sram_mgr.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2012-2018 ARM Limited or its affiliates. */
+
+#ifndef __CC_SRAM_MGR_H__
+#define __CC_SRAM_MGR_H__
+
+#ifndef CC_CC_SRAM_SIZE
+#define CC_CC_SRAM_SIZE 4096
+#endif
+
+struct cc_drvdata;
+
+/**
+ * Address (offset) within CC internal SRAM
+ */
+
+typedef u64 cc_sram_addr_t;
+
+#define NULL_SRAM_ADDR ((cc_sram_addr_t)-1)
+
+/*!
+ * Initializes SRAM pool.
+ * The first X bytes of SRAM are reserved for ROM usage, hence, pool
+ * starts right after X bytes.
+ *
+ * \param drvdata
+ *
+ * \return int Zero for success, negative value otherwise.
+ */
+int cc_sram_mgr_init(struct cc_drvdata *drvdata);
+
+/*!
+ * Uninits SRAM pool.
+ *
+ * \param drvdata
+ */
+void cc_sram_mgr_fini(struct cc_drvdata *drvdata);
+
+/*!
+ * Allocated buffer from SRAM pool.
+ * Note: Caller is responsible to free the LAST allocated buffer.
+ * This function does not taking care of any fragmentation may occur
+ * by the order of calls to alloc/free.
+ *
+ * \param drvdata
+ * \param size The requested bytes to allocate
+ */
+cc_sram_addr_t cc_sram_alloc(struct cc_drvdata *drvdata, u32 size);
+
+/**
+ * cc_set_sram_desc() - Create const descriptors sequence to
+ *	set values in given array into SRAM.
+ * Note: each const value can't exceed word size.
+ *
+ * @src:	  A pointer to array of words to set as consts.
+ * @dst:	  The target SRAM buffer to set into
+ * @nelements:	  The number of words in "src" array
+ * @seq:	  A pointer to the given IN/OUT descriptor sequence
+ * @seq_len:	  A pointer to the given IN/OUT sequence length
+ */
+void cc_set_sram_desc(const u32 *src, cc_sram_addr_t dst,
+		      unsigned int nelement, struct cc_hw_desc *seq,
+		      unsigned int *seq_len);
+
+#endif /*__CC_SRAM_MGR_H__*/
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 34a02d690548..59fe6631e73e 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -131,6 +131,11 @@ static inline int is_ofld_imm(const struct sk_buff *skb)
 	return (skb->len <= SGE_MAX_WR_LEN);
 }
 
+static inline void chcr_init_hctx_per_wr(struct chcr_ahash_req_ctx *reqctx)
+{
+	memset(&reqctx->hctx_wr, 0, sizeof(struct chcr_hctx_per_wr));
+}
+
 static int sg_nents_xlen(struct scatterlist *sg, unsigned int reqlen,
 			 unsigned int entlen,
 			 unsigned int skip)
@@ -160,41 +165,6 @@ static int sg_nents_xlen(struct scatterlist *sg, unsigned int reqlen,
 	return nents;
 }
 
-static inline void chcr_handle_ahash_resp(struct ahash_request *req,
-					  unsigned char *input,
-					  int err)
-{
-	struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
-	int digestsize, updated_digestsize;
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm));
-
-	if (input == NULL)
-		goto out;
-	digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
-	if (reqctx->is_sg_map)
-		chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
-	if (reqctx->dma_addr)
-		dma_unmap_single(&u_ctx->lldi.pdev->dev, reqctx->dma_addr,
-				 reqctx->dma_len, DMA_TO_DEVICE);
-	reqctx->dma_addr = 0;
-	updated_digestsize = digestsize;
-	if (digestsize == SHA224_DIGEST_SIZE)
-		updated_digestsize = SHA256_DIGEST_SIZE;
-	else if (digestsize == SHA384_DIGEST_SIZE)
-		updated_digestsize = SHA512_DIGEST_SIZE;
-	if (reqctx->result == 1) {
-		reqctx->result = 0;
-		memcpy(req->result, input + sizeof(struct cpl_fw6_pld),
-		       digestsize);
-	} else {
-		memcpy(reqctx->partial_hash, input + sizeof(struct cpl_fw6_pld),
-		       updated_digestsize);
-	}
-out:
-	req->base.complete(&req->base, err);
-}
-
 static inline int get_aead_subtype(struct crypto_aead *aead)
 {
 	struct aead_alg *alg = crypto_aead_alg(aead);
@@ -247,34 +217,6 @@ static inline void chcr_handle_aead_resp(struct aead_request *req,
 	req->base.complete(&req->base, err);
 }
 
-/*
- *	chcr_handle_resp - Unmap the DMA buffers associated with the request
- *	@req: crypto request
- */
-int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
-			 int err)
-{
-	struct crypto_tfm *tfm = req->tfm;
-	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
-	struct adapter *adap = padap(ctx->dev);
-
-	switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
-	case CRYPTO_ALG_TYPE_AEAD:
-		chcr_handle_aead_resp(aead_request_cast(req), input, err);
-		break;
-
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		 err = chcr_handle_cipher_resp(ablkcipher_request_cast(req),
-					       input, err);
-		break;
-
-	case CRYPTO_ALG_TYPE_AHASH:
-		chcr_handle_ahash_resp(ahash_request_cast(req), input, err);
-		}
-	atomic_inc(&adap->chcr_stats.complete);
-	return err;
-}
-
 static void get_aes_decrypt_key(unsigned char *dec_key,
 				       const unsigned char *key,
 				       unsigned int keylength)
@@ -563,7 +505,6 @@ static void  ulptx_walk_add_sg(struct ulptx_walk *walk,
 
 	if (!len)
 		return;
-
 	while (sg && skip) {
 		if (sg_dma_len(sg) <= skip) {
 			skip -= sg_dma_len(sg);
@@ -653,6 +594,35 @@ static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
 	}
 	return 0;
 }
+
+static int chcr_hash_ent_in_wr(struct scatterlist *src,
+			     unsigned int minsg,
+			     unsigned int space,
+			     unsigned int srcskip)
+{
+	int srclen = 0;
+	int srcsg = minsg;
+	int soffset = 0, sless;
+
+	if (sg_dma_len(src) == srcskip) {
+		src = sg_next(src);
+		srcskip = 0;
+	}
+	while (src && space > (sgl_ent_len[srcsg + 1])) {
+		sless = min_t(unsigned int, sg_dma_len(src) - soffset -	srcskip,
+							CHCR_SRC_SG_SIZE);
+		srclen += sless;
+		soffset += sless;
+		srcsg++;
+		if (sg_dma_len(src) == (soffset + srcskip)) {
+			src = sg_next(src);
+			soffset = 0;
+			srcskip = 0;
+		}
+	}
+	return srclen;
+}
+
 static int chcr_sg_ent_in_wr(struct scatterlist *src,
 			     struct scatterlist *dst,
 			     unsigned int minsg,
@@ -662,7 +632,7 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
 {
 	int srclen = 0, dstlen = 0;
 	int srcsg = minsg, dstsg = minsg;
-	int offset = 0, less;
+	int offset = 0, soffset = 0, less, sless = 0;
 
 	if (sg_dma_len(src) == srcskip) {
 		src = sg_next(src);
@@ -676,7 +646,9 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
 
 	while (src && dst &&
 	       space > (sgl_ent_len[srcsg + 1] + dsgl_ent_len[dstsg])) {
-		srclen += (sg_dma_len(src) - srcskip);
+		sless = min_t(unsigned int, sg_dma_len(src) - srcskip - soffset,
+				CHCR_SRC_SG_SIZE);
+		srclen += sless;
 		srcsg++;
 		offset = 0;
 		while (dst && ((dstsg + 1) <= MAX_DSGL_ENT) &&
@@ -687,15 +659,20 @@ static int chcr_sg_ent_in_wr(struct scatterlist *src,
 				     dstskip, CHCR_DST_SG_SIZE);
 			dstlen += less;
 			offset += less;
-			if (offset == sg_dma_len(dst)) {
+			if ((offset + dstskip) == sg_dma_len(dst)) {
 				dst = sg_next(dst);
 				offset = 0;
 			}
 			dstsg++;
 			dstskip = 0;
 		}
-		src = sg_next(src);
-		srcskip = 0;
+		soffset += sless;
+		if ((soffset + srcskip) == sg_dma_len(src)) {
+			src = sg_next(src);
+			srcskip = 0;
+			soffset = 0;
+		}
+
 	}
 	return min(srclen, dstlen);
 }
@@ -784,14 +761,14 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 	nents = sg_nents_xlen(reqctx->dstsg,  wrparam->bytes, CHCR_DST_SG_SIZE,
 			      reqctx->dst_ofst);
 	dst_size = get_space_for_phys_dsgl(nents + 1);
-	kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
+	kctx_len = roundup(ablkctx->enckey_len, 16);
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
 	nents = sg_nents_xlen(reqctx->srcsg, wrparam->bytes,
 				  CHCR_SRC_SG_SIZE, reqctx->src_ofst);
-	temp = reqctx->imm ? (DIV_ROUND_UP((IV + wrparam->req->nbytes), 16)
-			      * 16) : (sgl_len(nents + MIN_CIPHER_SG) * 8);
+	temp = reqctx->imm ? roundup(IV + wrparam->req->nbytes, 16) :
+				     (sgl_len(nents + MIN_CIPHER_SG) * 8);
 	transhdr_len += temp;
-	transhdr_len = DIV_ROUND_UP(transhdr_len, 16) * 16;
+	transhdr_len = roundup(transhdr_len, 16);
 	skb = alloc_skb(SGE_MAX_WR_LEN, flags);
 	if (!skb) {
 		error = -ENOMEM;
@@ -847,6 +824,13 @@ static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 		    transhdr_len, temp,
 			ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC);
 	reqctx->skb = skb;
+
+	if (reqctx->op && (ablkctx->ciph_mode ==
+			   CHCR_SCMD_CIPHER_MODE_AES_CBC))
+		sg_pcopy_to_buffer(wrparam->req->src,
+			sg_nents(wrparam->req->src), wrparam->req->info, 16,
+			reqctx->processed + wrparam->bytes - AES_BLOCK_SIZE);
+
 	return skb;
 err:
 	return ERR_PTR(error);
@@ -1070,9 +1054,8 @@ static int chcr_update_cipher_iv(struct ablkcipher_request *req,
 		ret = chcr_update_tweak(req, iv, 0);
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
 		if (reqctx->op)
-			sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
-					   16,
-					   reqctx->processed - AES_BLOCK_SIZE);
+			/*Updated before sending last WR*/
+			memcpy(iv, req->info, AES_BLOCK_SIZE);
 		else
 			memcpy(iv, &fw6_pld->data[2], AES_BLOCK_SIZE);
 	}
@@ -1100,11 +1083,8 @@ static int chcr_final_cipher_iv(struct ablkcipher_request *req,
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
 		ret = chcr_update_tweak(req, iv, 1);
 	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
-		if (reqctx->op)
-			sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
-					   16,
-					   reqctx->processed - AES_BLOCK_SIZE);
-		else
+		/*Already updated for Decrypt*/
+		if (!reqctx->op)
 			memcpy(iv, &fw6_pld->data[2], AES_BLOCK_SIZE);
 
 	}
@@ -1143,12 +1123,12 @@ static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
 	}
 	if (!reqctx->imm) {
 		bytes = chcr_sg_ent_in_wr(reqctx->srcsg, reqctx->dstsg, 1,
-					  SPACE_LEFT(ablkctx->enckey_len),
+					  CIP_SPACE_LEFT(ablkctx->enckey_len),
 					  reqctx->src_ofst, reqctx->dst_ofst);
 		if ((bytes + reqctx->processed) >= req->nbytes)
 			bytes  = req->nbytes - reqctx->processed;
 		else
-			bytes = ROUND_16(bytes);
+			bytes = rounddown(bytes, 16);
 	} else {
 		/*CTR mode counter overfloa*/
 		bytes  = req->nbytes - reqctx->processed;
@@ -1234,7 +1214,7 @@ static int process_cipher(struct ablkcipher_request *req,
 				       CHCR_DST_SG_SIZE, 0);
 		dnents += 1; // IV
 		phys_dsgl = get_space_for_phys_dsgl(dnents);
-		kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
+		kctx_len = roundup(ablkctx->enckey_len, 16);
 		transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
 		reqctx->imm = (transhdr_len + IV + req->nbytes) <=
 			SGE_MAX_WR_LEN;
@@ -1247,12 +1227,12 @@ static int process_cipher(struct ablkcipher_request *req,
 	if (!reqctx->imm) {
 		bytes = chcr_sg_ent_in_wr(req->src, req->dst,
 					  MIN_CIPHER_SG,
-					  SPACE_LEFT(ablkctx->enckey_len),
+					  CIP_SPACE_LEFT(ablkctx->enckey_len),
 					  0, 0);
 		if ((bytes + reqctx->processed) >= req->nbytes)
 			bytes  = req->nbytes - reqctx->processed;
 		else
-			bytes = ROUND_16(bytes);
+			bytes = rounddown(bytes, 16);
 	} else {
 		bytes = req->nbytes;
 	}
@@ -1282,7 +1262,7 @@ static int process_cipher(struct ablkcipher_request *req,
 					   req->src,
 					   req->dst,
 					   req->nbytes,
-					   req->info,
+					   reqctx->iv,
 					   op_type);
 		goto error;
 	}
@@ -1503,35 +1483,24 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm));
 	struct chcr_wr *chcr_req;
 	struct ulptx_sgl *ulptx;
-	unsigned int nents = 0, transhdr_len, iopad_alignment = 0;
-	unsigned int digestsize = crypto_ahash_digestsize(tfm);
-	unsigned int kctx_len = 0, temp = 0;
-	u8 hash_size_in_response = 0;
+	unsigned int nents = 0, transhdr_len;
+	unsigned int temp = 0;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
 	struct adapter *adap = padap(h_ctx(tfm)->dev);
 	int error = 0;
 
-	iopad_alignment = KEYCTX_ALIGN_PAD(digestsize);
-	kctx_len = param->alg_prm.result_size + iopad_alignment;
-	if (param->opad_needed)
-		kctx_len += param->alg_prm.result_size + iopad_alignment;
-
-	if (req_ctx->result)
-		hash_size_in_response = digestsize;
-	else
-		hash_size_in_response = param->alg_prm.result_size;
-	transhdr_len = HASH_TRANSHDR_SIZE(kctx_len);
-	req_ctx->imm = (transhdr_len + param->bfr_len + param->sg_len) <=
-		SGE_MAX_WR_LEN;
-	nents = sg_nents_xlen(req->src, param->sg_len, CHCR_SRC_SG_SIZE, 0);
+	transhdr_len = HASH_TRANSHDR_SIZE(param->kctx_len);
+	req_ctx->hctx_wr.imm = (transhdr_len + param->bfr_len +
+				param->sg_len) <= SGE_MAX_WR_LEN;
+	nents = sg_nents_xlen(req_ctx->hctx_wr.srcsg, param->sg_len,
+		      CHCR_SRC_SG_SIZE, req_ctx->hctx_wr.src_ofst);
 	nents += param->bfr_len ? 1 : 0;
-	transhdr_len += req_ctx->imm ? (DIV_ROUND_UP((param->bfr_len +
-			param->sg_len), 16) * 16) :
-			(sgl_len(nents) * 8);
-	transhdr_len = DIV_ROUND_UP(transhdr_len, 16) * 16;
+	transhdr_len += req_ctx->hctx_wr.imm ? roundup(param->bfr_len +
+				param->sg_len, 16) : (sgl_len(nents) * 8);
+	transhdr_len = roundup(transhdr_len, 16);
 
-	skb = alloc_skb(SGE_MAX_WR_LEN, flags);
+	skb = alloc_skb(transhdr_len, flags);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 	chcr_req = __skb_put_zero(skb, transhdr_len);
@@ -1563,33 +1532,33 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 	chcr_req->key_ctx.ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY,
 					    param->alg_prm.mk_size, 0,
 					    param->opad_needed,
-					    ((kctx_len +
+					    ((param->kctx_len +
 					     sizeof(chcr_req->key_ctx)) >> 4));
 	chcr_req->sec_cpl.scmd1 = cpu_to_be64((u64)param->scmd1);
-	ulptx = (struct ulptx_sgl *)((u8 *)(chcr_req + 1) + kctx_len +
+	ulptx = (struct ulptx_sgl *)((u8 *)(chcr_req + 1) + param->kctx_len +
 				     DUMMY_BYTES);
 	if (param->bfr_len != 0) {
-		req_ctx->dma_addr = dma_map_single(&u_ctx->lldi.pdev->dev,
-					  req_ctx->reqbfr, param->bfr_len,
-					  DMA_TO_DEVICE);
+		req_ctx->hctx_wr.dma_addr =
+			dma_map_single(&u_ctx->lldi.pdev->dev, req_ctx->reqbfr,
+				       param->bfr_len, DMA_TO_DEVICE);
 		if (dma_mapping_error(&u_ctx->lldi.pdev->dev,
-				       req_ctx->dma_addr)) {
+				       req_ctx->hctx_wr. dma_addr)) {
 			error = -ENOMEM;
 			goto err;
 		}
-		req_ctx->dma_len = param->bfr_len;
+		req_ctx->hctx_wr.dma_len = param->bfr_len;
 	} else {
-		req_ctx->dma_addr = 0;
+		req_ctx->hctx_wr.dma_addr = 0;
 	}
 	chcr_add_hash_src_ent(req, ulptx, param);
 	/* Request upto max wr size */
-	temp = kctx_len + DUMMY_BYTES + (req_ctx->imm ? (param->sg_len
-					+ param->bfr_len) : 0);
+	temp = param->kctx_len + DUMMY_BYTES + (req_ctx->hctx_wr.imm ?
+				(param->sg_len + param->bfr_len) : 0);
 	atomic_inc(&adap->chcr_stats.digest_rqst);
-	create_wreq(h_ctx(tfm), chcr_req, &req->base, req_ctx->imm,
-		    hash_size_in_response, transhdr_len,
+	create_wreq(h_ctx(tfm), chcr_req, &req->base, req_ctx->hctx_wr.imm,
+		    param->hash_size, transhdr_len,
 		    temp,  0);
-	req_ctx->skb = skb;
+	req_ctx->hctx_wr.skb = skb;
 	return skb;
 err:
 	kfree_skb(skb);
@@ -1608,7 +1577,6 @@ static int chcr_ahash_update(struct ahash_request *req)
 	int error;
 
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
-
 	u_ctx = ULD_CTX(h_ctx(rtfm));
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    h_ctx(rtfm)->tx_qidx))) {
@@ -1625,17 +1593,26 @@ static int chcr_ahash_update(struct ahash_request *req)
 		req_ctx->reqlen += nbytes;
 		return 0;
 	}
+	chcr_init_hctx_per_wr(req_ctx);
 	error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req);
 	if (error)
 		return -ENOMEM;
+	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+	params.kctx_len = roundup(params.alg_prm.result_size, 16);
+	params.sg_len = chcr_hash_ent_in_wr(req->src, !!req_ctx->reqlen,
+				     HASH_SPACE_LEFT(params.kctx_len), 0);
+	if (params.sg_len > req->nbytes)
+		params.sg_len = req->nbytes;
+	params.sg_len = rounddown(params.sg_len + req_ctx->reqlen, bs) -
+			req_ctx->reqlen;
 	params.opad_needed = 0;
 	params.more = 1;
 	params.last = 0;
-	params.sg_len = nbytes - req_ctx->reqlen;
 	params.bfr_len = req_ctx->reqlen;
 	params.scmd1 = 0;
-	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
-	req_ctx->result = 0;
+	req_ctx->hctx_wr.srcsg = req->src;
+
+	params.hash_size = params.alg_prm.result_size;
 	req_ctx->data_len += params.sg_len + params.bfr_len;
 	skb = create_hash_wr(req, &params);
 	if (IS_ERR(skb)) {
@@ -1643,6 +1620,7 @@ static int chcr_ahash_update(struct ahash_request *req)
 		goto unmap;
 	}
 
+	req_ctx->hctx_wr.processed += params.sg_len;
 	if (remainder) {
 		/* Swap buffers */
 		swap(req_ctx->reqbfr, req_ctx->skbfr);
@@ -1680,16 +1658,27 @@ static int chcr_ahash_final(struct ahash_request *req)
 	struct uld_ctx *u_ctx = NULL;
 	u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
 
+	chcr_init_hctx_per_wr(req_ctx);
 	u_ctx = ULD_CTX(h_ctx(rtfm));
 	if (is_hmac(crypto_ahash_tfm(rtfm)))
 		params.opad_needed = 1;
 	else
 		params.opad_needed = 0;
 	params.sg_len = 0;
+	req_ctx->hctx_wr.isfinal = 1;
 	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
-	req_ctx->result = 1;
+	params.kctx_len = roundup(params.alg_prm.result_size, 16);
+	if (is_hmac(crypto_ahash_tfm(rtfm))) {
+		params.opad_needed = 1;
+		params.kctx_len *= 2;
+	} else {
+		params.opad_needed = 0;
+	}
+
+	req_ctx->hctx_wr.result = 1;
 	params.bfr_len = req_ctx->reqlen;
 	req_ctx->data_len += params.bfr_len + params.sg_len;
+	req_ctx->hctx_wr.srcsg = req->src;
 	if (req_ctx->reqlen == 0) {
 		create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len);
 		params.last = 0;
@@ -1702,10 +1691,11 @@ static int chcr_ahash_final(struct ahash_request *req)
 		params.last = 1;
 		params.more = 0;
 	}
+	params.hash_size = crypto_ahash_digestsize(rtfm);
 	skb = create_hash_wr(req, &params);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
-
+	req_ctx->reqlen = 0;
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
 	chcr_send_wr(skb);
@@ -1730,37 +1720,59 @@ static int chcr_ahash_finup(struct ahash_request *req)
 		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
 			return -EBUSY;
 	}
+	chcr_init_hctx_per_wr(req_ctx);
+	error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req);
+	if (error)
+		return -ENOMEM;
 
-	if (is_hmac(crypto_ahash_tfm(rtfm)))
+	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+	params.kctx_len = roundup(params.alg_prm.result_size, 16);
+	if (is_hmac(crypto_ahash_tfm(rtfm))) {
+		params.kctx_len *= 2;
 		params.opad_needed = 1;
-	else
+	} else {
 		params.opad_needed = 0;
+	}
 
-	params.sg_len = req->nbytes;
+	params.sg_len = chcr_hash_ent_in_wr(req->src, !!req_ctx->reqlen,
+				    HASH_SPACE_LEFT(params.kctx_len), 0);
+	if (params.sg_len < req->nbytes) {
+		if (is_hmac(crypto_ahash_tfm(rtfm))) {
+			params.kctx_len /= 2;
+			params.opad_needed = 0;
+		}
+		params.last = 0;
+		params.more = 1;
+		params.sg_len = rounddown(params.sg_len + req_ctx->reqlen, bs)
+					- req_ctx->reqlen;
+		params.hash_size = params.alg_prm.result_size;
+		params.scmd1 = 0;
+	} else {
+		params.last = 1;
+		params.more = 0;
+		params.sg_len = req->nbytes;
+		params.hash_size = crypto_ahash_digestsize(rtfm);
+		params.scmd1 = req_ctx->data_len + req_ctx->reqlen +
+				params.sg_len;
+	}
 	params.bfr_len = req_ctx->reqlen;
-	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
 	req_ctx->data_len += params.bfr_len + params.sg_len;
-	req_ctx->result = 1;
+	req_ctx->hctx_wr.result = 1;
+	req_ctx->hctx_wr.srcsg = req->src;
 	if ((req_ctx->reqlen + req->nbytes) == 0) {
 		create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len);
 		params.last = 0;
 		params.more = 1;
 		params.scmd1 = 0;
 		params.bfr_len = bs;
-	} else {
-		params.scmd1 = req_ctx->data_len;
-		params.last = 1;
-		params.more = 0;
 	}
-	error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req);
-	if (error)
-		return -ENOMEM;
-
 	skb = create_hash_wr(req, &params);
 	if (IS_ERR(skb)) {
 		error = PTR_ERR(skb);
 		goto unmap;
 	}
+	req_ctx->reqlen = 0;
+	req_ctx->hctx_wr.processed += params.sg_len;
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
 	chcr_send_wr(skb);
@@ -1791,21 +1803,42 @@ static int chcr_ahash_digest(struct ahash_request *req)
 			return -EBUSY;
 	}
 
-	if (is_hmac(crypto_ahash_tfm(rtfm)))
-		params.opad_needed = 1;
-	else
-		params.opad_needed = 0;
+	chcr_init_hctx_per_wr(req_ctx);
 	error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req);
 	if (error)
 		return -ENOMEM;
 
-	params.last = 0;
-	params.more = 0;
-	params.sg_len = req->nbytes;
-	params.bfr_len = 0;
-	params.scmd1 = 0;
 	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
-	req_ctx->result = 1;
+	params.kctx_len = roundup(params.alg_prm.result_size, 16);
+	if (is_hmac(crypto_ahash_tfm(rtfm))) {
+		params.kctx_len *= 2;
+		params.opad_needed = 1;
+	} else {
+		params.opad_needed = 0;
+	}
+	params.sg_len = chcr_hash_ent_in_wr(req->src, !!req_ctx->reqlen,
+				HASH_SPACE_LEFT(params.kctx_len), 0);
+	if (params.sg_len < req->nbytes) {
+		if (is_hmac(crypto_ahash_tfm(rtfm))) {
+			params.kctx_len /= 2;
+			params.opad_needed = 0;
+		}
+		params.last = 0;
+		params.more = 1;
+		params.scmd1 = 0;
+		params.sg_len = rounddown(params.sg_len, bs);
+		params.hash_size = params.alg_prm.result_size;
+	} else {
+		params.sg_len = req->nbytes;
+		params.hash_size = crypto_ahash_digestsize(rtfm);
+		params.last = 1;
+		params.more = 0;
+		params.scmd1 = req->nbytes + req_ctx->data_len;
+
+	}
+	params.bfr_len = 0;
+	req_ctx->hctx_wr.result = 1;
+	req_ctx->hctx_wr.srcsg = req->src;
 	req_ctx->data_len += params.bfr_len + params.sg_len;
 
 	if (req->nbytes == 0) {
@@ -1819,6 +1852,7 @@ static int chcr_ahash_digest(struct ahash_request *req)
 		error = PTR_ERR(skb);
 		goto unmap;
 	}
+	req_ctx->hctx_wr.processed += params.sg_len;
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
 	chcr_send_wr(skb);
@@ -1828,6 +1862,151 @@ unmap:
 	return error;
 }
 
+static int chcr_ahash_continue(struct ahash_request *req)
+{
+	struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
+	struct chcr_hctx_per_wr *hctx_wr = &reqctx->hctx_wr;
+	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+	struct uld_ctx *u_ctx = NULL;
+	struct sk_buff *skb;
+	struct hash_wr_param params;
+	u8  bs;
+	int error;
+
+	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+	u_ctx = ULD_CTX(h_ctx(rtfm));
+	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+					    h_ctx(rtfm)->tx_qidx))) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+	params.kctx_len = roundup(params.alg_prm.result_size, 16);
+	if (is_hmac(crypto_ahash_tfm(rtfm))) {
+		params.kctx_len *= 2;
+		params.opad_needed = 1;
+	} else {
+		params.opad_needed = 0;
+	}
+	params.sg_len = chcr_hash_ent_in_wr(hctx_wr->srcsg, 0,
+					    HASH_SPACE_LEFT(params.kctx_len),
+					    hctx_wr->src_ofst);
+	if ((params.sg_len + hctx_wr->processed) > req->nbytes)
+		params.sg_len = req->nbytes - hctx_wr->processed;
+	if (!hctx_wr->result ||
+	    ((params.sg_len + hctx_wr->processed) < req->nbytes)) {
+		if (is_hmac(crypto_ahash_tfm(rtfm))) {
+			params.kctx_len /= 2;
+			params.opad_needed = 0;
+		}
+		params.last = 0;
+		params.more = 1;
+		params.sg_len = rounddown(params.sg_len, bs);
+		params.hash_size = params.alg_prm.result_size;
+		params.scmd1 = 0;
+	} else {
+		params.last = 1;
+		params.more = 0;
+		params.hash_size = crypto_ahash_digestsize(rtfm);
+		params.scmd1 = reqctx->data_len + params.sg_len;
+	}
+	params.bfr_len = 0;
+	reqctx->data_len += params.sg_len;
+	skb = create_hash_wr(req, &params);
+	if (IS_ERR(skb)) {
+		error = PTR_ERR(skb);
+		goto err;
+	}
+	hctx_wr->processed += params.sg_len;
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
+	chcr_send_wr(skb);
+	return 0;
+err:
+	return error;
+}
+
+static inline void chcr_handle_ahash_resp(struct ahash_request *req,
+					  unsigned char *input,
+					  int err)
+{
+	struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
+	struct chcr_hctx_per_wr *hctx_wr = &reqctx->hctx_wr;
+	int digestsize, updated_digestsize;
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct uld_ctx *u_ctx = ULD_CTX(h_ctx(tfm));
+
+	if (input == NULL)
+		goto out;
+	digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
+	updated_digestsize = digestsize;
+	if (digestsize == SHA224_DIGEST_SIZE)
+		updated_digestsize = SHA256_DIGEST_SIZE;
+	else if (digestsize == SHA384_DIGEST_SIZE)
+		updated_digestsize = SHA512_DIGEST_SIZE;
+
+	if (hctx_wr->dma_addr) {
+		dma_unmap_single(&u_ctx->lldi.pdev->dev, hctx_wr->dma_addr,
+				 hctx_wr->dma_len, DMA_TO_DEVICE);
+		hctx_wr->dma_addr = 0;
+	}
+	if (hctx_wr->isfinal || ((hctx_wr->processed + reqctx->reqlen) ==
+				 req->nbytes)) {
+		if (hctx_wr->result == 1) {
+			hctx_wr->result = 0;
+			memcpy(req->result, input + sizeof(struct cpl_fw6_pld),
+			       digestsize);
+		} else {
+			memcpy(reqctx->partial_hash,
+			       input + sizeof(struct cpl_fw6_pld),
+			       updated_digestsize);
+
+		}
+		goto unmap;
+	}
+	memcpy(reqctx->partial_hash, input + sizeof(struct cpl_fw6_pld),
+	       updated_digestsize);
+
+	err = chcr_ahash_continue(req);
+	if (err)
+		goto unmap;
+	return;
+unmap:
+	if (hctx_wr->is_sg_map)
+		chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
+
+
+out:
+	req->base.complete(&req->base, err);
+}
+
+/*
+ *	chcr_handle_resp - Unmap the DMA buffers associated with the request
+ *	@req: crypto request
+ */
+int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
+			 int err)
+{
+	struct crypto_tfm *tfm = req->tfm;
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct adapter *adap = padap(ctx->dev);
+
+	switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_AEAD:
+		chcr_handle_aead_resp(aead_request_cast(req), input, err);
+		break;
+
+	case CRYPTO_ALG_TYPE_ABLKCIPHER:
+		 err = chcr_handle_cipher_resp(ablkcipher_request_cast(req),
+					       input, err);
+		break;
+
+	case CRYPTO_ALG_TYPE_AHASH:
+		chcr_handle_ahash_resp(ahash_request_cast(req), input, err);
+		}
+	atomic_inc(&adap->chcr_stats.complete);
+	return err;
+}
 static int chcr_ahash_export(struct ahash_request *areq, void *out)
 {
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
@@ -1835,11 +2014,10 @@ static int chcr_ahash_export(struct ahash_request *areq, void *out)
 
 	state->reqlen = req_ctx->reqlen;
 	state->data_len = req_ctx->data_len;
-	state->is_sg_map = 0;
-	state->result = 0;
 	memcpy(state->bfr1, req_ctx->reqbfr, req_ctx->reqlen);
 	memcpy(state->partial_hash, req_ctx->partial_hash,
 	       CHCR_HASH_MAX_DIGEST_SIZE);
+	chcr_init_hctx_per_wr(state);
 		return 0;
 }
 
@@ -1852,11 +2030,10 @@ static int chcr_ahash_import(struct ahash_request *areq, const void *in)
 	req_ctx->data_len = state->data_len;
 	req_ctx->reqbfr = req_ctx->bfr1;
 	req_ctx->skbfr = req_ctx->bfr2;
-	req_ctx->is_sg_map = 0;
-	req_ctx->result = 0;
 	memcpy(req_ctx->bfr1, state->bfr1, CHCR_HASH_MAX_BLOCK_SIZE_128);
 	memcpy(req_ctx->partial_hash, state->partial_hash,
 	       CHCR_HASH_MAX_DIGEST_SIZE);
+	chcr_init_hctx_per_wr(req_ctx);
 	return 0;
 }
 
@@ -1953,10 +2130,8 @@ static int chcr_sha_init(struct ahash_request *areq)
 	req_ctx->reqlen = 0;
 	req_ctx->reqbfr = req_ctx->bfr1;
 	req_ctx->skbfr = req_ctx->bfr2;
-	req_ctx->skb = NULL;
-	req_ctx->result = 0;
-	req_ctx->is_sg_map = 0;
 	copy_hash_init_values(req_ctx->partial_hash, digestsize);
+
 	return 0;
 }
 
@@ -2124,11 +2299,11 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
 	reqctx->imm = (transhdr_len + assoclen + IV + req->cryptlen) <
 			SGE_MAX_WR_LEN;
-	temp = reqctx->imm ? (DIV_ROUND_UP((assoclen + IV + req->cryptlen), 16)
-			* 16) : (sgl_len(reqctx->src_nents + reqctx->aad_nents
+	temp = reqctx->imm ? roundup(assoclen + IV + req->cryptlen, 16)
+			: (sgl_len(reqctx->src_nents + reqctx->aad_nents
 			+ MIN_GCM_SG) * 8);
 	transhdr_len += temp;
-	transhdr_len = DIV_ROUND_UP(transhdr_len, 16) * 16;
+	transhdr_len = roundup(transhdr_len, 16);
 
 	if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE,
 				    transhdr_len, op_type)) {
@@ -2187,9 +2362,8 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 		memcpy(chcr_req->key_ctx.key, actx->dec_rrkey,
 		       aeadctx->enckey_len);
 
-	memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) <<
-					4), actx->h_iopad, kctx_len -
-				(DIV_ROUND_UP(aeadctx->enckey_len, 16) << 4));
+	memcpy(chcr_req->key_ctx.key + roundup(aeadctx->enckey_len, 16),
+	       actx->h_iopad, kctx_len - roundup(aeadctx->enckey_len, 16));
 	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA ||
 	    subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) {
 		memcpy(reqctx->iv, aeadctx->nonce, CTR_RFC3686_NONCE_SIZE);
@@ -2398,22 +2572,26 @@ void chcr_add_hash_src_ent(struct ahash_request *req,
 	struct ulptx_walk ulp_walk;
 	struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
 
-	if (reqctx->imm) {
+	if (reqctx->hctx_wr.imm) {
 		u8 *buf = (u8 *)ulptx;
 
 		if (param->bfr_len) {
 			memcpy(buf, reqctx->reqbfr, param->bfr_len);
 			buf += param->bfr_len;
 		}
-		sg_pcopy_to_buffer(req->src, sg_nents(req->src),
-				   buf, param->sg_len, 0);
+
+		sg_pcopy_to_buffer(reqctx->hctx_wr.srcsg,
+				   sg_nents(reqctx->hctx_wr.srcsg), buf,
+				   param->sg_len, 0);
 	} else {
 		ulptx_walk_init(&ulp_walk, ulptx);
 		if (param->bfr_len)
 			ulptx_walk_add_page(&ulp_walk, param->bfr_len,
-					    &reqctx->dma_addr);
-		ulptx_walk_add_sg(&ulp_walk, req->src, param->sg_len,
-				  0);
+					    &reqctx->hctx_wr.dma_addr);
+		ulptx_walk_add_sg(&ulp_walk, reqctx->hctx_wr.srcsg,
+				  param->sg_len, reqctx->hctx_wr.src_ofst);
+		reqctx->hctx_wr.srcsg = ulp_walk.last_sg;
+		reqctx->hctx_wr.src_ofst = ulp_walk.last_sg_len;
 		ulptx_walk_end(&ulp_walk);
 	}
 }
@@ -2430,7 +2608,7 @@ int chcr_hash_dma_map(struct device *dev,
 			   DMA_TO_DEVICE);
 	if (!error)
 		return -ENOMEM;
-	req_ctx->is_sg_map = 1;
+	req_ctx->hctx_wr.is_sg_map = 1;
 	return 0;
 }
 
@@ -2444,7 +2622,7 @@ void chcr_hash_dma_unmap(struct device *dev,
 
 	dma_unmap_sg(dev, req->src, sg_nents(req->src),
 			   DMA_TO_DEVICE);
-	req_ctx->is_sg_map = 0;
+	req_ctx->hctx_wr.is_sg_map = 0;
 
 }
 
@@ -2636,10 +2814,10 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
 					0, dst_size);
 }
 
-int aead_ccm_validate_input(unsigned short op_type,
-			    struct aead_request *req,
-			    struct chcr_aead_ctx *aeadctx,
-			    unsigned int sub_type)
+static int aead_ccm_validate_input(unsigned short op_type,
+				   struct aead_request *req,
+				   struct chcr_aead_ctx *aeadctx,
+				   unsigned int sub_type)
 {
 	if (sub_type != CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) {
 		if (crypto_ccm_check_iv(req->iv)) {
@@ -2696,16 +2874,16 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 			CHCR_DST_SG_SIZE, req->assoclen);
 	dnents += MIN_CCM_SG; // For IV and B0
 	dst_size = get_space_for_phys_dsgl(dnents);
-	kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) * 2;
+	kctx_len = roundup(aeadctx->enckey_len, 16) * 2;
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
 	reqctx->imm = (transhdr_len + assoclen + IV + req->cryptlen +
 		       reqctx->b0_len) <= SGE_MAX_WR_LEN;
-	temp = reqctx->imm ? (DIV_ROUND_UP((assoclen + IV + req->cryptlen +
-				reqctx->b0_len), 16) * 16) :
+	temp = reqctx->imm ? roundup(assoclen + IV + req->cryptlen +
+				     reqctx->b0_len, 16) :
 		(sgl_len(reqctx->src_nents + reqctx->aad_nents +
 				    MIN_CCM_SG) *  8);
 	transhdr_len += temp;
-	transhdr_len = DIV_ROUND_UP(transhdr_len, 16) * 16;
+	transhdr_len = roundup(transhdr_len, 16);
 
 	if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE -
 				    reqctx->b0_len, transhdr_len, op_type)) {
@@ -2727,8 +2905,8 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 
 	chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
 	memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len);
-	memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) *
-					16), aeadctx->key, aeadctx->enckey_len);
+	memcpy(chcr_req->key_ctx.key + roundup(aeadctx->enckey_len, 16),
+			aeadctx->key, aeadctx->enckey_len);
 
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	ulptx = (struct ulptx_sgl *)((u8 *)(phys_cpl + 1) + dst_size);
@@ -2798,16 +2976,15 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 				CHCR_DST_SG_SIZE, req->assoclen);
 	dnents += MIN_GCM_SG; // For IV
 	dst_size = get_space_for_phys_dsgl(dnents);
-	kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) +
-		AEAD_H_SIZE;
+	kctx_len = roundup(aeadctx->enckey_len, 16) + AEAD_H_SIZE;
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
 	reqctx->imm = (transhdr_len + assoclen + IV + req->cryptlen) <=
 			SGE_MAX_WR_LEN;
-	temp = reqctx->imm ? (DIV_ROUND_UP((assoclen + IV +
-	req->cryptlen), 16) * 16) : (sgl_len(reqctx->src_nents +
-				reqctx->aad_nents + MIN_GCM_SG) * 8);
+	temp = reqctx->imm ? roundup(assoclen + IV + req->cryptlen, 16) :
+		(sgl_len(reqctx->src_nents +
+		reqctx->aad_nents + MIN_GCM_SG) * 8);
 	transhdr_len += temp;
-	transhdr_len = DIV_ROUND_UP(transhdr_len, 16) * 16;
+	transhdr_len = roundup(transhdr_len, 16);
 	if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE,
 			    transhdr_len, op_type)) {
 		atomic_inc(&adap->chcr_stats.fallback);
@@ -2846,8 +3023,8 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 					0, 0, dst_size);
 	chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
 	memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len);
-	memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) *
-				16), GCM_CTX(aeadctx)->ghash_h, AEAD_H_SIZE);
+	memcpy(chcr_req->key_ctx.key + roundup(aeadctx->enckey_len, 16),
+	       GCM_CTX(aeadctx)->ghash_h, AEAD_H_SIZE);
 
 	/* prepare a 16 byte iv */
 	/* S   A   L  T |  IV | 0x00000001 */
@@ -3067,11 +3244,10 @@ static int chcr_ccm_common_setkey(struct crypto_aead *aead,
 	unsigned char ck_size, mk_size;
 	int key_ctx_size = 0;
 
-	key_ctx_size = sizeof(struct _key_ctx) +
-		((DIV_ROUND_UP(keylen, 16)) << 4)  * 2;
+	key_ctx_size = sizeof(struct _key_ctx) + roundup(keylen, 16) * 2;
 	if (keylen == AES_KEYSIZE_128) {
-		mk_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
 		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+		mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_128;
 	} else if (keylen == AES_KEYSIZE_192) {
 		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
 		mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_192;
@@ -3178,10 +3354,9 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 
 	memcpy(aeadctx->key, key, keylen);
 	aeadctx->enckey_len = keylen;
-	key_ctx_size = sizeof(struct _key_ctx) +
-		((DIV_ROUND_UP(keylen, 16)) << 4) +
+	key_ctx_size = sizeof(struct _key_ctx) + roundup(keylen, 16) +
 		AEAD_H_SIZE;
-		aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+	aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
 						CHCR_KEYCTX_MAC_KEY_SIZE_128,
 						0, 0,
 						key_ctx_size >> 4);
@@ -3281,6 +3456,7 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 	if (IS_ERR(base_hash)) {
 		pr_err("chcr : Base driver cannot be loaded\n");
 		aeadctx->enckey_len = 0;
+		memzero_explicit(&keys, sizeof(keys));
 		return -EINVAL;
 	}
 	{
@@ -3325,17 +3501,19 @@ static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
 		chcr_change_order(actx->h_iopad, param.result_size);
 		chcr_change_order(o_ptr, param.result_size);
 		key_ctx_len = sizeof(struct _key_ctx) +
-			((DIV_ROUND_UP(keys.enckeylen, 16)) << 4) +
+			roundup(keys.enckeylen, 16) +
 			(param.result_size + align) * 2;
 		aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, param.mk_size,
 						0, 1, key_ctx_len >> 4);
 		actx->auth_mode = param.auth_mode;
 		chcr_free_shash(base_hash);
 
+		memzero_explicit(&keys, sizeof(keys));
 		return 0;
 	}
 out:
 	aeadctx->enckey_len = 0;
+	memzero_explicit(&keys, sizeof(keys));
 	if (!IS_ERR(base_hash))
 		chcr_free_shash(base_hash);
 	return -EINVAL;
@@ -3393,15 +3571,16 @@ static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc,
 		get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
 				aeadctx->enckey_len << 3);
 	}
-	key_ctx_len =  sizeof(struct _key_ctx)
-		+ ((DIV_ROUND_UP(keys.enckeylen, 16)) << 4);
+	key_ctx_len =  sizeof(struct _key_ctx) + roundup(keys.enckeylen, 16);
 
 	aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY, 0,
 						0, key_ctx_len >> 4);
 	actx->auth_mode = CHCR_SCMD_AUTH_MODE_NOP;
+	memzero_explicit(&keys, sizeof(keys));
 	return 0;
 out:
 	aeadctx->enckey_len = 0;
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index f263cd42a84f..dba3dff1e209 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -258,15 +258,16 @@
 #define FILL_CMD_MORE(immdatalen) htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) |\
 					ULP_TX_SC_MORE_V((immdatalen)))
 #define MAX_NK 8
-#define ROUND_16(bytes)		((bytes) & 0xFFFFFFF0)
 #define MAX_DSGL_ENT			32
 #define MIN_CIPHER_SG			1 /* IV */
 #define MIN_AUTH_SG			1 /* IV */
 #define MIN_GCM_SG			1 /* IV */
 #define MIN_DIGEST_SG			1 /*Partial Buffer*/
 #define MIN_CCM_SG			2 /*IV+B0*/
-#define SPACE_LEFT(len) \
-	((SGE_MAX_WR_LEN - WR_MIN_LEN - (len)))
+#define CIP_SPACE_LEFT(len) \
+	((SGE_MAX_WR_LEN - CIP_WR_MIN_LEN - (len)))
+#define HASH_SPACE_LEFT(len) \
+	((SGE_MAX_WR_LEN - HASH_WR_MIN_LEN - (len)))
 
 struct algo_param {
 	unsigned int auth_mode;
@@ -275,12 +276,14 @@ struct algo_param {
 };
 
 struct hash_wr_param {
+	struct algo_param alg_prm;
 	unsigned int opad_needed;
 	unsigned int more;
 	unsigned int last;
-	struct algo_param alg_prm;
+	unsigned int kctx_len;
 	unsigned int sg_len;
 	unsigned int bfr_len;
+	unsigned int hash_size;
 	u64 scmd1;
 };
 
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index 77056a90c8e1..1a20424e18c6 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -54,10 +54,14 @@
 #define MAC_ERROR_BIT		0
 #define CHK_MAC_ERR_BIT(x)	(((x) >> MAC_ERROR_BIT) & 1)
 #define MAX_SALT                4
-#define WR_MIN_LEN (sizeof(struct chcr_wr) + \
+#define CIP_WR_MIN_LEN (sizeof(struct chcr_wr) + \
 		    sizeof(struct cpl_rx_phys_dsgl) + \
 		    sizeof(struct ulptx_sgl))
 
+#define HASH_WR_MIN_LEN (sizeof(struct chcr_wr) + \
+			DUMMY_BYTES + \
+		    sizeof(struct ulptx_sgl))
+
 #define padap(dev) pci_get_drvdata(dev->u_ctx->lldi.pdev)
 
 struct uld_ctx;
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 7daf0a17a7d2..c8e8972af283 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -258,21 +258,32 @@ struct chcr_context {
 	struct __crypto_ctx crypto_ctx[0];
 };
 
-struct chcr_ahash_req_ctx {
+struct chcr_hctx_per_wr {
+	struct scatterlist *srcsg;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+	u32 dma_len;
+	unsigned int src_ofst;
+	unsigned int processed;
 	u32 result;
-	u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128];
-	u8 bfr2[CHCR_HASH_MAX_BLOCK_SIZE_128];
+	u8 is_sg_map;
+	u8 imm;
+	/*Final callback called. Driver cannot rely on nbytes to decide
+	 * final call
+	 */
+	u8 isfinal;
+};
+
+struct chcr_ahash_req_ctx {
+	struct chcr_hctx_per_wr hctx_wr;
 	u8 *reqbfr;
 	u8 *skbfr;
-	dma_addr_t dma_addr;
-	u32 dma_len;
+	/* SKB which is being sent to the hardware for processing */
+	u64 data_len;  /* Data len till time */
 	u8 reqlen;
-	u8 imm;
-	u8 is_sg_map;
 	u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE];
-	u64 data_len;  /* Data len till time */
-	/* SKB which is being sent to the hardware for processing */
-	struct sk_buff *skb;
+	u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128];
+	u8 bfr2[CHCR_HASH_MAX_BLOCK_SIZE_128];
 };
 
 struct chcr_blkcipher_req_ctx {
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
index db1e241104ed..8e0aa3f175c9 100644
--- a/drivers/crypto/chelsio/chcr_ipsec.c
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -360,8 +360,7 @@ inline void *copy_cpltx_pktxt(struct sk_buff *skb,
 
 	cpl = (struct cpl_tx_pkt_core *)pos;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
+	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
 	ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_INTF_V(pi->tx_chan) |
 			       TXPKT_PF_V(adap->pf);
 	if (skb_vlan_tag_present(skb)) {
@@ -475,7 +474,7 @@ inline void *chcr_crypto_wreq(struct sk_buff *skb,
 	wr->req.ulptx.len = htonl(DIV_ROUND_UP(flits, 2)  - 1);
 
 	/* Sub-command */
-	wr->req.sc_imm.cmd_more = FILL_CMD_MORE(immdatalen);
+	wr->req.sc_imm.cmd_more = FILL_CMD_MORE(!immdatalen);
 	wr->req.sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
 					 sizeof(wr->req.key_ctx) +
 					 kctx_len +
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 225e74a7f724..d4a81be0d7d2 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -235,7 +235,7 @@ static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv)
 		/* Configure DMA tx control */
 		val = EIP197_HIA_xDR_CFG_WR_CACHE(WR_CACHE_3BITS);
 		val |= EIP197_HIA_xDR_CFG_RD_CACHE(RD_CACHE_3BITS);
-		val |= EIP197_HIA_xDR_WR_RES_BUF | EIP197_HIA_xDR_WR_CTRL_BUG;
+		val |= EIP197_HIA_xDR_WR_RES_BUF | EIP197_HIA_xDR_WR_CTRL_BUF;
 		writel(val,
 		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_DMA_CFG);
 
@@ -332,7 +332,7 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 	val = EIP197_HIA_DSE_CFG_DIS_DEBUG;
 	val |= EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(7) | EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(8);
 	val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(WR_CACHE_3BITS);
-	val |= EIP197_HIA_DSE_CFG_ALLWAYS_BUFFERABLE;
+	val |= EIP197_HIA_DSE_CFG_ALWAYS_BUFFERABLE;
 	/* FIXME: instability issues can occur for EIP97 but disabling it impact
 	 * performances.
 	 */
@@ -354,7 +354,7 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 	val |= EIP197_PROTOCOL_ENCRYPT_ONLY | EIP197_PROTOCOL_HASH_ONLY;
 	val |= EIP197_ALG_AES_ECB | EIP197_ALG_AES_CBC;
 	val |= EIP197_ALG_SHA1 | EIP197_ALG_HMAC_SHA1;
-	val |= EIP197_ALG_SHA2;
+	val |= EIP197_ALG_SHA2 | EIP197_ALG_HMAC_SHA2;
 	writel(val, EIP197_PE(priv) + EIP197_PE_EIP96_FUNCTION_EN);
 
 	/* Command Descriptor Rings prepare */
@@ -432,20 +432,18 @@ static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 }
 
 /* Called with ring's lock taken */
-static int safexcel_try_push_requests(struct safexcel_crypto_priv *priv,
-				      int ring, int reqs)
+static void safexcel_try_push_requests(struct safexcel_crypto_priv *priv,
+				       int ring)
 {
-	int coal = min_t(int, reqs, EIP197_MAX_BATCH_SZ);
+	int coal = min_t(int, priv->ring[ring].requests, EIP197_MAX_BATCH_SZ);
 
 	if (!coal)
-		return 0;
+		return;
 
 	/* Configure when we want an interrupt */
 	writel(EIP197_HIA_RDR_THRESH_PKT_MODE |
 	       EIP197_HIA_RDR_THRESH_PROC_PKT(coal),
 	       EIP197_HIA_RDR(priv, ring) + EIP197_HIA_xDR_THRESH);
-
-	return coal;
 }
 
 void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring)
@@ -490,6 +488,15 @@ handle_req:
 		if (backlog)
 			backlog->complete(backlog, -EINPROGRESS);
 
+		/* In case the send() helper did not issue any command to push
+		 * to the engine because the input data was cached, continue to
+		 * dequeue other requests as this is valid and not an error.
+		 */
+		if (!commands && !results) {
+			kfree(request);
+			continue;
+		}
+
 		spin_lock_bh(&priv->ring[ring].egress_lock);
 		list_add_tail(&request->list, &priv->ring[ring].list);
 		spin_unlock_bh(&priv->ring[ring].egress_lock);
@@ -512,14 +519,13 @@ finalize:
 
 	spin_lock_bh(&priv->ring[ring].egress_lock);
 
+	priv->ring[ring].requests += nreq;
+
 	if (!priv->ring[ring].busy) {
-		nreq -= safexcel_try_push_requests(priv, ring, nreq);
-		if (nreq)
-			priv->ring[ring].busy = true;
+		safexcel_try_push_requests(priv, ring);
+		priv->ring[ring].busy = true;
 	}
 
-	priv->ring[ring].requests_left += nreq;
-
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
 
 	/* let the RDR know we have pending descriptors */
@@ -531,25 +537,6 @@ finalize:
 	       EIP197_HIA_CDR(priv, ring) + EIP197_HIA_xDR_PREP_COUNT);
 }
 
-void safexcel_free_context(struct safexcel_crypto_priv *priv,
-			   struct crypto_async_request *req,
-			   int result_sz)
-{
-	struct safexcel_context *ctx = crypto_tfm_ctx(req->tfm);
-
-	if (ctx->result_dma)
-		dma_unmap_single(priv->dev, ctx->result_dma, result_sz,
-				 DMA_FROM_DEVICE);
-
-	if (ctx->cache) {
-		dma_unmap_single(priv->dev, ctx->cache_dma, ctx->cache_sz,
-				 DMA_TO_DEVICE);
-		kfree(ctx->cache);
-		ctx->cache = NULL;
-		ctx->cache_sz = 0;
-	}
-}
-
 void safexcel_complete(struct safexcel_crypto_priv *priv, int ring)
 {
 	struct safexcel_command_desc *cdesc;
@@ -623,7 +610,7 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv
 {
 	struct safexcel_request *sreq;
 	struct safexcel_context *ctx;
-	int ret, i, nreq, ndesc, tot_descs, done;
+	int ret, i, nreq, ndesc, tot_descs, handled = 0;
 	bool should_complete;
 
 handle_results:
@@ -659,6 +646,7 @@ handle_results:
 
 		kfree(sreq);
 		tot_descs += ndesc;
+		handled++;
 	}
 
 acknowledge:
@@ -677,11 +665,10 @@ acknowledge:
 requests_left:
 	spin_lock_bh(&priv->ring[ring].egress_lock);
 
-	done = safexcel_try_push_requests(priv, ring,
-					  priv->ring[ring].requests_left);
+	priv->ring[ring].requests -= handled;
+	safexcel_try_push_requests(priv, ring);
 
-	priv->ring[ring].requests_left -= done;
-	if (!done && !priv->ring[ring].requests_left)
+	if (!priv->ring[ring].requests)
 		priv->ring[ring].busy = false;
 
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
@@ -781,6 +768,8 @@ static struct safexcel_alg_template *safexcel_algs[] = {
 	&safexcel_alg_sha224,
 	&safexcel_alg_sha256,
 	&safexcel_alg_hmac_sha1,
+	&safexcel_alg_hmac_sha224,
+	&safexcel_alg_hmac_sha256,
 };
 
 static int safexcel_register_algorithms(struct safexcel_crypto_priv *priv)
@@ -894,29 +883,44 @@ static int safexcel_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->base);
 	}
 
-	priv->clk = of_clk_get(dev->of_node, 0);
-	if (!IS_ERR(priv->clk)) {
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	ret = PTR_ERR_OR_ZERO(priv->clk);
+	/* The clock isn't mandatory */
+	if  (ret != -ENOENT) {
+		if (ret)
+			return ret;
+
 		ret = clk_prepare_enable(priv->clk);
 		if (ret) {
 			dev_err(dev, "unable to enable clk (%d)\n", ret);
 			return ret;
 		}
-	} else {
-		/* The clock isn't mandatory */
-		if (PTR_ERR(priv->clk) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
+	}
+
+	priv->reg_clk = devm_clk_get(&pdev->dev, "reg");
+	ret = PTR_ERR_OR_ZERO(priv->reg_clk);
+	/* The clock isn't mandatory */
+	if  (ret != -ENOENT) {
+		if (ret)
+			goto err_core_clk;
+
+		ret = clk_prepare_enable(priv->reg_clk);
+		if (ret) {
+			dev_err(dev, "unable to enable reg clk (%d)\n", ret);
+			goto err_core_clk;
+		}
 	}
 
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
 	if (ret)
-		goto err_clk;
+		goto err_reg_clk;
 
 	priv->context_pool = dmam_pool_create("safexcel-context", dev,
 					      sizeof(struct safexcel_context_record),
 					      1, 0);
 	if (!priv->context_pool) {
 		ret = -ENOMEM;
-		goto err_clk;
+		goto err_reg_clk;
 	}
 
 	safexcel_configure(priv);
@@ -931,12 +935,12 @@ static int safexcel_probe(struct platform_device *pdev)
 						     &priv->ring[i].cdr,
 						     &priv->ring[i].rdr);
 		if (ret)
-			goto err_clk;
+			goto err_reg_clk;
 
 		ring_irq = devm_kzalloc(dev, sizeof(*ring_irq), GFP_KERNEL);
 		if (!ring_irq) {
 			ret = -ENOMEM;
-			goto err_clk;
+			goto err_reg_clk;
 		}
 
 		ring_irq->priv = priv;
@@ -948,7 +952,7 @@ static int safexcel_probe(struct platform_device *pdev)
 						ring_irq);
 		if (irq < 0) {
 			ret = irq;
-			goto err_clk;
+			goto err_reg_clk;
 		}
 
 		priv->ring[i].work_data.priv = priv;
@@ -959,10 +963,10 @@ static int safexcel_probe(struct platform_device *pdev)
 		priv->ring[i].workqueue = create_singlethread_workqueue(wq_name);
 		if (!priv->ring[i].workqueue) {
 			ret = -ENOMEM;
-			goto err_clk;
+			goto err_reg_clk;
 		}
 
-		priv->ring[i].requests_left = 0;
+		priv->ring[i].requests = 0;
 		priv->ring[i].busy = false;
 
 		crypto_init_queue(&priv->ring[i].queue,
@@ -980,18 +984,20 @@ static int safexcel_probe(struct platform_device *pdev)
 	ret = safexcel_hw_init(priv);
 	if (ret) {
 		dev_err(dev, "EIP h/w init failed (%d)\n", ret);
-		goto err_clk;
+		goto err_reg_clk;
 	}
 
 	ret = safexcel_register_algorithms(priv);
 	if (ret) {
 		dev_err(dev, "Failed to register algorithms (%d)\n", ret);
-		goto err_clk;
+		goto err_reg_clk;
 	}
 
 	return 0;
 
-err_clk:
+err_reg_clk:
+	clk_disable_unprepare(priv->reg_clk);
+err_core_clk:
 	clk_disable_unprepare(priv->clk);
 	return ret;
 }
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index 4e219c21608b..b470a849721f 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -135,7 +135,7 @@
 
 /* EIP197_HIA_xDR_DMA_CFG */
 #define EIP197_HIA_xDR_WR_RES_BUF		BIT(22)
-#define EIP197_HIA_xDR_WR_CTRL_BUG		BIT(23)
+#define EIP197_HIA_xDR_WR_CTRL_BUF		BIT(23)
 #define EIP197_HIA_xDR_WR_OWN_BUF		BIT(24)
 #define EIP197_HIA_xDR_CFG_WR_CACHE(n)		(((n) & 0x7) << 25)
 #define EIP197_HIA_xDR_CFG_RD_CACHE(n)		(((n) & 0x7) << 29)
@@ -179,7 +179,7 @@
 #define EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(n)	((n) << 0)
 #define EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(n)	(((n) & 0x7) << 4)
 #define EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(n)	((n) << 8)
-#define EIP197_HIA_DSE_CFG_ALLWAYS_BUFFERABLE	GENMASK(15, 14)
+#define EIP197_HIA_DSE_CFG_ALWAYS_BUFFERABLE	GENMASK(15, 14)
 #define EIP197_HIA_DxE_CFG_MIN_CTRL_SIZE(n)	((n) << 16)
 #define EIP197_HIA_DxE_CFG_CTRL_CACHE_CTRL(n)	(((n) & 0x7) << 20)
 #define EIP197_HIA_DxE_CFG_MAX_CTRL_SIZE(n)	((n) << 24)
@@ -525,6 +525,7 @@ struct safexcel_crypto_priv {
 	void __iomem *base;
 	struct device *dev;
 	struct clk *clk;
+	struct clk *reg_clk;
 	struct safexcel_config config;
 
 	enum safexcel_eip_version version;
@@ -551,10 +552,8 @@ struct safexcel_crypto_priv {
 		struct crypto_queue queue;
 		spinlock_t queue_lock;
 
-		/* Number of requests in the engine that needs the threshold
-		 * interrupt to be set up.
-		 */
-		int requests_left;
+		/* Number of requests in the engine. */
+		int requests;
 
 		/* The ring is currently handling at least one request */
 		bool busy;
@@ -580,12 +579,6 @@ struct safexcel_context {
 	int ring;
 	bool needs_inv;
 	bool exit_inv;
-
-	/* Used for ahash requests */
-	dma_addr_t result_dma;
-	void *cache;
-	dma_addr_t cache_dma;
-	unsigned int cache_sz;
 };
 
 /*
@@ -609,9 +602,6 @@ struct safexcel_inv_result {
 
 void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring);
 void safexcel_complete(struct safexcel_crypto_priv *priv, int ring);
-void safexcel_free_context(struct safexcel_crypto_priv *priv,
-				  struct crypto_async_request *req,
-				  int result_sz);
 int safexcel_invalidate_cache(struct crypto_async_request *async,
 			      struct safexcel_crypto_priv *priv,
 			      dma_addr_t ctxr_dma, int ring,
@@ -643,5 +633,7 @@ extern struct safexcel_alg_template safexcel_alg_sha1;
 extern struct safexcel_alg_template safexcel_alg_sha224;
 extern struct safexcel_alg_template safexcel_alg_sha256;
 extern struct safexcel_alg_template safexcel_alg_hmac_sha1;
+extern struct safexcel_alg_template safexcel_alg_hmac_sha224;
+extern struct safexcel_alg_template safexcel_alg_hmac_sha256;
 
 #endif
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index 63a8768ed2ae..bafb60505fab 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -58,7 +58,8 @@ static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx,
 
 	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
 	token[0].packet_length = length;
-	token[0].stat = EIP197_TOKEN_STAT_LAST_PACKET;
+	token[0].stat = EIP197_TOKEN_STAT_LAST_PACKET |
+			EIP197_TOKEN_STAT_LAST_HASH;
 	token[0].instructions = EIP197_TOKEN_INS_LAST |
 				EIP197_TOKEN_INS_TYPE_CRYTO |
 				EIP197_TOKEN_INS_TYPE_OUTPUT;
@@ -456,7 +457,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
 	queue_work(priv->ring[ring].workqueue,
 		   &priv->ring[ring].work_data.work);
 
-	wait_for_completion_interruptible(&result.completion);
+	wait_for_completion(&result.completion);
 
 	if (result.error) {
 		dev_warn(priv->dev,
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 122a2a58e98f..317b9e480312 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -21,10 +21,9 @@ struct safexcel_ahash_ctx {
 	struct safexcel_crypto_priv *priv;
 
 	u32 alg;
-	u32 digest;
 
-	u32 ipad[SHA1_DIGEST_SIZE / sizeof(u32)];
-	u32 opad[SHA1_DIGEST_SIZE / sizeof(u32)];
+	u32 ipad[SHA256_DIGEST_SIZE / sizeof(u32)];
+	u32 opad[SHA256_DIGEST_SIZE / sizeof(u32)];
 };
 
 struct safexcel_ahash_req {
@@ -34,6 +33,9 @@ struct safexcel_ahash_req {
 	bool needs_inv;
 
 	int nents;
+	dma_addr_t result_dma;
+
+	u32 digest;
 
 	u8 state_sz;    /* expected sate size, only set once */
 	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32));
@@ -42,6 +44,9 @@ struct safexcel_ahash_req {
 	u64 processed;
 
 	u8 cache[SHA256_BLOCK_SIZE] __aligned(sizeof(u32));
+	dma_addr_t cache_dma;
+	unsigned int cache_sz;
+
 	u8 cache_next[SHA256_BLOCK_SIZE] __aligned(sizeof(u32));
 };
 
@@ -49,6 +54,8 @@ struct safexcel_ahash_export_state {
 	u64 len;
 	u64 processed;
 
+	u32 digest;
+
 	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
 	u8 cache[SHA256_BLOCK_SIZE];
 };
@@ -82,9 +89,9 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
 
 	cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_HASH_OUT;
 	cdesc->control_data.control0 |= ctx->alg;
-	cdesc->control_data.control0 |= ctx->digest;
+	cdesc->control_data.control0 |= req->digest;
 
-	if (ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) {
+	if (req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) {
 		if (req->processed) {
 			if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA1)
 				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(6);
@@ -112,12 +119,12 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
 			if (req->finish)
 				ctx->base.ctxr->data[i] = cpu_to_le32(req->processed / blocksize);
 		}
-	} else if (ctx->digest == CONTEXT_CONTROL_DIGEST_HMAC) {
-		cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(10);
+	} else if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC) {
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(2 * req->state_sz / sizeof(u32));
 
-		memcpy(ctx->base.ctxr->data, ctx->ipad, digestsize);
-		memcpy(ctx->base.ctxr->data + digestsize / sizeof(u32),
-		       ctx->opad, digestsize);
+		memcpy(ctx->base.ctxr->data, ctx->ipad, req->state_sz);
+		memcpy(ctx->base.ctxr->data + req->state_sz / sizeof(u32),
+		       ctx->opad, req->state_sz);
 	}
 }
 
@@ -149,16 +156,26 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int rin
 	safexcel_complete(priv, ring);
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
 
-	if (sreq->finish)
-		memcpy(areq->result, sreq->state,
-		       crypto_ahash_digestsize(ahash));
-
 	if (sreq->nents) {
 		dma_unmap_sg(priv->dev, areq->src, sreq->nents, DMA_TO_DEVICE);
 		sreq->nents = 0;
 	}
 
-	safexcel_free_context(priv, async, sreq->state_sz);
+	if (sreq->result_dma) {
+		dma_unmap_single(priv->dev, sreq->result_dma, sreq->state_sz,
+				 DMA_FROM_DEVICE);
+		sreq->result_dma = 0;
+	}
+
+	if (sreq->cache_dma) {
+		dma_unmap_single(priv->dev, sreq->cache_dma, sreq->cache_sz,
+				 DMA_TO_DEVICE);
+		sreq->cache_dma = 0;
+	}
+
+	if (sreq->finish)
+		memcpy(areq->result, sreq->state,
+		       crypto_ahash_digestsize(ahash));
 
 	cache_len = sreq->len - sreq->processed;
 	if (cache_len)
@@ -184,7 +201,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 	int i, queued, len, cache_len, extra, n_cdesc = 0, ret = 0;
 
 	queued = len = req->len - req->processed;
-	if (queued < crypto_ahash_blocksize(ahash))
+	if (queued <= crypto_ahash_blocksize(ahash))
 		cache_len = queued;
 	else
 		cache_len = queued - areq->nbytes;
@@ -198,7 +215,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 			/* If this is not the last request and the queued data
 			 * is a multiple of a block, cache the last one for now.
 			 */
-			extra = queued - crypto_ahash_blocksize(ahash);
+			extra = crypto_ahash_blocksize(ahash);
 
 		if (extra) {
 			sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
@@ -220,24 +237,17 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 
 	/* Add a command descriptor for the cached data, if any */
 	if (cache_len) {
-		ctx->base.cache = kzalloc(cache_len, EIP197_GFP_FLAGS(*async));
-		if (!ctx->base.cache) {
-			ret = -ENOMEM;
-			goto unlock;
-		}
-		memcpy(ctx->base.cache, req->cache, cache_len);
-		ctx->base.cache_dma = dma_map_single(priv->dev, ctx->base.cache,
-						     cache_len, DMA_TO_DEVICE);
-		if (dma_mapping_error(priv->dev, ctx->base.cache_dma)) {
-			ret = -EINVAL;
-			goto free_cache;
+		req->cache_dma = dma_map_single(priv->dev, req->cache,
+						cache_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->dev, req->cache_dma)) {
+			spin_unlock_bh(&priv->ring[ring].egress_lock);
+			return -EINVAL;
 		}
 
-		ctx->base.cache_sz = cache_len;
+		req->cache_sz = cache_len;
 		first_cdesc = safexcel_add_cdesc(priv, ring, 1,
 						 (cache_len == len),
-						 ctx->base.cache_dma,
-						 cache_len, len,
+						 req->cache_dma, cache_len, len,
 						 ctx->base.ctxr_dma);
 		if (IS_ERR(first_cdesc)) {
 			ret = PTR_ERR(first_cdesc);
@@ -271,7 +281,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 					   sglen, len, ctx->base.ctxr_dma);
 		if (IS_ERR(cdesc)) {
 			ret = PTR_ERR(cdesc);
-			goto cdesc_rollback;
+			goto unmap_sg;
 		}
 		n_cdesc++;
 
@@ -291,19 +301,19 @@ send_command:
 	/* Add the token */
 	safexcel_hash_token(first_cdesc, len, req->state_sz);
 
-	ctx->base.result_dma = dma_map_single(priv->dev, req->state,
-					      req->state_sz, DMA_FROM_DEVICE);
-	if (dma_mapping_error(priv->dev, ctx->base.result_dma)) {
+	req->result_dma = dma_map_single(priv->dev, req->state, req->state_sz,
+					 DMA_FROM_DEVICE);
+	if (dma_mapping_error(priv->dev, req->result_dma)) {
 		ret = -EINVAL;
-		goto cdesc_rollback;
+		goto unmap_sg;
 	}
 
 	/* Add a result descriptor */
-	rdesc = safexcel_add_rdesc(priv, ring, 1, 1, ctx->base.result_dma,
+	rdesc = safexcel_add_rdesc(priv, ring, 1, 1, req->result_dma,
 				   req->state_sz);
 	if (IS_ERR(rdesc)) {
 		ret = PTR_ERR(rdesc);
-		goto cdesc_rollback;
+		goto unmap_result;
 	}
 
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
@@ -315,20 +325,21 @@ send_command:
 	*results = 1;
 	return 0;
 
+unmap_result:
+	dma_unmap_single(priv->dev, req->result_dma, req->state_sz,
+			 DMA_FROM_DEVICE);
+unmap_sg:
+	dma_unmap_sg(priv->dev, areq->src, req->nents, DMA_TO_DEVICE);
 cdesc_rollback:
 	for (i = 0; i < n_cdesc; i++)
 		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
 unmap_cache:
-	if (ctx->base.cache_dma) {
-		dma_unmap_single(priv->dev, ctx->base.cache_dma,
-				 ctx->base.cache_sz, DMA_TO_DEVICE);
-		ctx->base.cache_sz = 0;
+	if (req->cache_dma) {
+		dma_unmap_single(priv->dev, req->cache_dma, req->cache_sz,
+				 DMA_TO_DEVICE);
+		req->cache_sz = 0;
 	}
-free_cache:
-	kfree(ctx->base.cache);
-	ctx->base.cache = NULL;
 
-unlock:
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
 	return ret;
 }
@@ -493,7 +504,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
 	queue_work(priv->ring[ring].workqueue,
 		   &priv->ring[ring].work_data.work);
 
-	wait_for_completion_interruptible(&result.completion);
+	wait_for_completion(&result.completion);
 
 	if (result.error) {
 		dev_warn(priv->dev, "hash: completion error (%d)\n",
@@ -550,7 +561,7 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
 	if (ctx->base.ctxr) {
 		if (priv->version == EIP197 &&
 		    !ctx->base.needs_inv && req->processed &&
-		    ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
+		    req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
 			/* We're still setting needs_inv here, even though it is
 			 * cleared right away, because the needs_inv flag can be
 			 * set in other functions and we want to keep the same
@@ -585,7 +596,6 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
 
 static int safexcel_ahash_update(struct ahash_request *areq)
 {
-	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
 
@@ -601,7 +611,7 @@ static int safexcel_ahash_update(struct ahash_request *areq)
 	 * We're not doing partial updates when performing an hmac request.
 	 * Everything will be handled by the final() call.
 	 */
-	if (ctx->digest == CONTEXT_CONTROL_DIGEST_HMAC)
+	if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
 		return 0;
 
 	if (req->hmac)
@@ -660,6 +670,8 @@ static int safexcel_ahash_export(struct ahash_request *areq, void *out)
 	export->len = req->len;
 	export->processed = req->processed;
 
+	export->digest = req->digest;
+
 	memcpy(export->state, req->state, req->state_sz);
 	memcpy(export->cache, req->cache, crypto_ahash_blocksize(ahash));
 
@@ -680,6 +692,8 @@ static int safexcel_ahash_import(struct ahash_request *areq, const void *in)
 	req->len = export->len;
 	req->processed = export->processed;
 
+	req->digest = export->digest;
+
 	memcpy(req->cache, export->cache, crypto_ahash_blocksize(ahash));
 	memcpy(req->state, export->state, req->state_sz);
 
@@ -716,7 +730,7 @@ static int safexcel_sha1_init(struct ahash_request *areq)
 	req->state[4] = SHA1_H4;
 
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
-	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA1_DIGEST_SIZE;
 
 	return 0;
@@ -783,10 +797,10 @@ struct safexcel_alg_template safexcel_alg_sha1 = {
 
 static int safexcel_hmac_sha1_init(struct ahash_request *areq)
 {
-	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 
 	safexcel_sha1_init(areq);
-	ctx->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
 	return 0;
 }
 
@@ -839,7 +853,7 @@ static int safexcel_hmac_init_pad(struct ahash_request *areq,
 		init_completion(&result.completion);
 
 		ret = crypto_ahash_digest(areq);
-		if (ret == -EINPROGRESS) {
+		if (ret == -EINPROGRESS || ret == -EBUSY) {
 			wait_for_completion_interruptible(&result.completion);
 			ret = result.error;
 		}
@@ -949,20 +963,21 @@ free_ahash:
 	return ret;
 }
 
-static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
-				     unsigned int keylen)
+static int safexcel_hmac_alg_setkey(struct crypto_ahash *tfm, const u8 *key,
+				    unsigned int keylen, const char *alg,
+				    unsigned int state_sz)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct safexcel_ahash_export_state istate, ostate;
 	int ret, i;
 
-	ret = safexcel_hmac_setkey("safexcel-sha1", key, keylen, &istate, &ostate);
+	ret = safexcel_hmac_setkey(alg, key, keylen, &istate, &ostate);
 	if (ret)
 		return ret;
 
 	if (priv->version == EIP197 && ctx->base.ctxr) {
-		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) {
+		for (i = 0; i < state_sz / sizeof(u32); i++) {
 			if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
 			    ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
 				ctx->base.needs_inv = true;
@@ -971,12 +986,19 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
 		}
 	}
 
-	memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE);
-	memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE);
+	memcpy(ctx->ipad, &istate.state, state_sz);
+	memcpy(ctx->opad, &ostate.state, state_sz);
 
 	return 0;
 }
 
+static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
+				     unsigned int keylen)
+{
+	return safexcel_hmac_alg_setkey(tfm, key, keylen, "safexcel-sha1",
+					SHA1_DIGEST_SIZE);
+}
+
 struct safexcel_alg_template safexcel_alg_hmac_sha1 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
 	.alg.ahash = {
@@ -1024,7 +1046,7 @@ static int safexcel_sha256_init(struct ahash_request *areq)
 	req->state[7] = SHA256_H7;
 
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA256;
-	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA256_DIGEST_SIZE;
 
 	return 0;
@@ -1086,7 +1108,7 @@ static int safexcel_sha224_init(struct ahash_request *areq)
 	req->state[7] = SHA224_H7;
 
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA224;
-	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA256_DIGEST_SIZE;
 
 	return 0;
@@ -1130,3 +1152,115 @@ struct safexcel_alg_template safexcel_alg_sha224 = {
 		},
 	},
 };
+
+static int safexcel_hmac_sha224_setkey(struct crypto_ahash *tfm, const u8 *key,
+				       unsigned int keylen)
+{
+	return safexcel_hmac_alg_setkey(tfm, key, keylen, "safexcel-sha224",
+					SHA256_DIGEST_SIZE);
+}
+
+static int safexcel_hmac_sha224_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	safexcel_sha224_init(areq);
+	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	return 0;
+}
+
+static int safexcel_hmac_sha224_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_hmac_sha224_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_hmac_sha224 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_hmac_sha224_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_hmac_sha224_digest,
+		.setkey = safexcel_hmac_sha224_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA224_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sha224)",
+				.cra_driver_name = "safexcel-hmac-sha224",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_hmac_sha256_setkey(struct crypto_ahash *tfm, const u8 *key,
+				     unsigned int keylen)
+{
+	return safexcel_hmac_alg_setkey(tfm, key, keylen, "safexcel-sha256",
+					SHA256_DIGEST_SIZE);
+}
+
+static int safexcel_hmac_sha256_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	safexcel_sha256_init(areq);
+	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	return 0;
+}
+
+static int safexcel_hmac_sha256_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_hmac_sha256_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_hmac_sha256 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_hmac_sha256_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_hmac_sha256_digest,
+		.setkey = safexcel_hmac_sha256_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA256_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sha256)",
+				.cra_driver_name = "safexcel-hmac-sha256",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 717a26607bdb..27f7dad2d45d 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -1167,9 +1167,11 @@ static int aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	ctx->authkey_len = keys.authkeylen;
 	ctx->enckey_len = keys.enckeylen;
 
+	memzero_explicit(&keys, sizeof(keys));
 	return aead_setup(tfm, crypto_aead_authsize(tfm));
 badkey:
 	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index aca2373fa1de..f81fa4a3e66b 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -25,7 +25,6 @@
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/dma-direct.h> /* XXX: drivers shall never use this directly! */
 #include <linux/clk.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 764be3e6933c..a10c418d4e5c 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -759,6 +759,16 @@ static int dcp_sha_digest(struct ahash_request *req)
 	return dcp_sha_finup(req);
 }
 
+static int dcp_sha_noimport(struct ahash_request *req, const void *in)
+{
+	return -ENOSYS;
+}
+
+static int dcp_sha_noexport(struct ahash_request *req, void *out)
+{
+	return -ENOSYS;
+}
+
 static int dcp_sha_cra_init(struct crypto_tfm *tfm)
 {
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
@@ -829,6 +839,8 @@ static struct ahash_alg dcp_sha1_alg = {
 	.final	= dcp_sha_final,
 	.finup	= dcp_sha_finup,
 	.digest	= dcp_sha_digest,
+	.import = dcp_sha_noimport,
+	.export = dcp_sha_noexport,
 	.halg	= {
 		.digestsize	= SHA1_DIGEST_SIZE,
 		.base		= {
@@ -853,6 +865,8 @@ static struct ahash_alg dcp_sha256_alg = {
 	.final	= dcp_sha_final,
 	.finup	= dcp_sha_finup,
 	.digest	= dcp_sha_digest,
+	.import = dcp_sha_noimport,
+	.export = dcp_sha_noexport,
 	.halg	= {
 		.digestsize	= SHA256_DIGEST_SIZE,
 		.base		= {
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 662e709812cc..80e9c842aad4 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -359,6 +359,16 @@ static int n2_hash_async_finup(struct ahash_request *req)
 	return crypto_ahash_finup(&rctx->fallback_req);
 }
 
+static int n2_hash_async_noimport(struct ahash_request *req, const void *in)
+{
+	return -ENOSYS;
+}
+
+static int n2_hash_async_noexport(struct ahash_request *req, void *out)
+{
+	return -ENOSYS;
+}
+
 static int n2_hash_cra_init(struct crypto_tfm *tfm)
 {
 	const char *fallback_driver_name = crypto_tfm_alg_name(tfm);
@@ -1467,6 +1477,8 @@ static int __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
 	ahash->final = n2_hash_async_final;
 	ahash->finup = n2_hash_async_finup;
 	ahash->digest = n2_hash_async_digest;
+	ahash->export = n2_hash_async_noexport;
+	ahash->import = n2_hash_async_noimport;
 
 	halg = &ahash->halg;
 	halg->digestsize = tmpl->digest_size;
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index bf52cd1d7fca..66869976cfa2 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -1105,10 +1105,9 @@ static int __init nx842_pseries_init(void)
 
 	RCU_INIT_POINTER(devdata, NULL);
 	new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
-	if (!new_devdata) {
-		pr_err("Could not allocate memory for device data\n");
+	if (!new_devdata)
 		return -ENOMEM;
-	}
+
 	RCU_INIT_POINTER(devdata, new_devdata);
 
 	ret = vio_register_driver(&nx842_vio_driver);
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index fbec0a2e76dd..9019f6b67986 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -47,6 +47,8 @@
 static LIST_HEAD(dev_list);
 static DEFINE_SPINLOCK(list_lock);
 
+static int aes_fallback_sz = 200;
+
 #ifdef DEBUG
 #define omap_aes_read(dd, offset)				\
 ({								\
@@ -388,7 +390,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 
 	pr_debug("err: %d\n", err);
 
-	crypto_finalize_cipher_request(dd->engine, req, err);
+	crypto_finalize_ablkcipher_request(dd->engine, req, err);
 
 	pm_runtime_mark_last_busy(dd->dev);
 	pm_runtime_put_autosuspend(dd->dev);
@@ -408,14 +410,15 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
 				 struct ablkcipher_request *req)
 {
 	if (req)
-		return crypto_transfer_cipher_request_to_engine(dd->engine, req);
+		return crypto_transfer_ablkcipher_request_to_engine(dd->engine, req);
 
 	return 0;
 }
 
 static int omap_aes_prepare_req(struct crypto_engine *engine,
-				struct ablkcipher_request *req)
+				void *areq)
 {
+	struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
 			crypto_ablkcipher_reqtfm(req));
 	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
@@ -468,8 +471,9 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 }
 
 static int omap_aes_crypt_req(struct crypto_engine *engine,
-			      struct ablkcipher_request *req)
+			      void *areq)
 {
+	struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
 	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
 	struct omap_aes_dev *dd = rctx->dd;
 
@@ -517,7 +521,7 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 		  !!(mode & FLAGS_ENCRYPT),
 		  !!(mode & FLAGS_CBC));
 
-	if (req->nbytes < 200) {
+	if (req->nbytes < aes_fallback_sz) {
 		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
 
 		skcipher_request_set_tfm(subreq, ctx->fallback);
@@ -601,6 +605,11 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
 	return omap_aes_crypt(req, FLAGS_CTR);
 }
 
+static int omap_aes_prepare_req(struct crypto_engine *engine,
+				void *req);
+static int omap_aes_crypt_req(struct crypto_engine *engine,
+			      void *req);
+
 static int omap_aes_cra_init(struct crypto_tfm *tfm)
 {
 	const char *name = crypto_tfm_alg_name(tfm);
@@ -616,6 +625,10 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
 
 	tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
 
+	ctx->enginectx.op.prepare_request = omap_aes_prepare_req;
+	ctx->enginectx.op.unprepare_request = NULL;
+	ctx->enginectx.op.do_one_request = omap_aes_crypt_req;
+
 	return 0;
 }
 
@@ -1029,6 +1042,87 @@ err:
 	return err;
 }
 
+static ssize_t fallback_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	return sprintf(buf, "%d\n", aes_fallback_sz);
+}
+
+static ssize_t fallback_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	ssize_t status;
+	long value;
+
+	status = kstrtol(buf, 0, &value);
+	if (status)
+		return status;
+
+	/* HW accelerator only works with buffers > 9 */
+	if (value < 9) {
+		dev_err(dev, "minimum fallback size 9\n");
+		return -EINVAL;
+	}
+
+	aes_fallback_sz = value;
+
+	return size;
+}
+
+static ssize_t queue_len_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct omap_aes_dev *dd = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", dd->engine->queue.max_qlen);
+}
+
+static ssize_t queue_len_store(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t size)
+{
+	struct omap_aes_dev *dd;
+	ssize_t status;
+	long value;
+	unsigned long flags;
+
+	status = kstrtol(buf, 0, &value);
+	if (status)
+		return status;
+
+	if (value < 1)
+		return -EINVAL;
+
+	/*
+	 * Changing the queue size in fly is safe, if size becomes smaller
+	 * than current size, it will just not accept new entries until
+	 * it has shrank enough.
+	 */
+	spin_lock_bh(&list_lock);
+	list_for_each_entry(dd, &dev_list, list) {
+		spin_lock_irqsave(&dd->lock, flags);
+		dd->engine->queue.max_qlen = value;
+		dd->aead_queue.base.max_qlen = value;
+		spin_unlock_irqrestore(&dd->lock, flags);
+	}
+	spin_unlock_bh(&list_lock);
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(queue_len);
+static DEVICE_ATTR_RW(fallback);
+
+static struct attribute *omap_aes_attrs[] = {
+	&dev_attr_queue_len.attr,
+	&dev_attr_fallback.attr,
+	NULL,
+};
+
+static struct attribute_group omap_aes_attr_group = {
+	.attrs = omap_aes_attrs,
+};
+
 static int omap_aes_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -1119,8 +1213,6 @@ static int omap_aes_probe(struct platform_device *pdev)
 		goto err_engine;
 	}
 
-	dd->engine->prepare_cipher_request = omap_aes_prepare_req;
-	dd->engine->cipher_one_request = omap_aes_crypt_req;
 	err = crypto_engine_start(dd->engine);
 	if (err)
 		goto err_engine;
@@ -1159,6 +1251,12 @@ static int omap_aes_probe(struct platform_device *pdev)
 		}
 	}
 
+	err = sysfs_create_group(&dev->kobj, &omap_aes_attr_group);
+	if (err) {
+		dev_err(dev, "could not create sysfs device attrs\n");
+		goto err_aead_algs;
+	}
+
 	return 0;
 err_aead_algs:
 	for (i = dd->pdata->aead_algs_info->registered - 1; i >= 0; i--) {
diff --git a/drivers/crypto/omap-aes.h b/drivers/crypto/omap-aes.h
index 8906342e2b9a..fc3b46a85809 100644
--- a/drivers/crypto/omap-aes.h
+++ b/drivers/crypto/omap-aes.h
@@ -13,6 +13,8 @@
 #ifndef __OMAP_AES_H__
 #define __OMAP_AES_H__
 
+#include <crypto/engine.h>
+
 #define DST_MAXBURST			4
 #define DMA_MIN				(DST_MAXBURST * sizeof(u32))
 
@@ -95,6 +97,7 @@ struct omap_aes_gcm_result {
 };
 
 struct omap_aes_ctx {
+	struct crypto_engine_ctx enginectx;
 	int		keylen;
 	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
 	u8		nonce[4];
diff --git a/drivers/crypto/omap-crypto.c b/drivers/crypto/omap-crypto.c
index 23e37779317e..2c42e4b4a6e9 100644
--- a/drivers/crypto/omap-crypto.c
+++ b/drivers/crypto/omap-crypto.c
@@ -104,6 +104,10 @@ static int omap_crypto_check_sg(struct scatterlist *sg, int total, int bs,
 			return OMAP_CRYPTO_NOT_ALIGNED;
 		if (!IS_ALIGNED(sg->length, bs))
 			return OMAP_CRYPTO_NOT_ALIGNED;
+#ifdef CONFIG_ZONE_DMA
+		if (page_zonenum(sg_page(sg)) != ZONE_DMA)
+			return OMAP_CRYPTO_NOT_ALIGNED;
+#endif
 
 		len += sg->length;
 		sg = sg_next(sg);
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index ebc5c0f11f03..eb95b0d7f184 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -86,6 +86,7 @@
 #define FLAGS_OUT_DATA_ST_SHIFT	10
 
 struct omap_des_ctx {
+	struct crypto_engine_ctx enginectx;
 	struct omap_des_dev *dd;
 
 	int		keylen;
@@ -498,7 +499,7 @@ static void omap_des_finish_req(struct omap_des_dev *dd, int err)
 
 	pr_debug("err: %d\n", err);
 
-	crypto_finalize_cipher_request(dd->engine, req, err);
+	crypto_finalize_ablkcipher_request(dd->engine, req, err);
 
 	pm_runtime_mark_last_busy(dd->dev);
 	pm_runtime_put_autosuspend(dd->dev);
@@ -520,14 +521,15 @@ static int omap_des_handle_queue(struct omap_des_dev *dd,
 				 struct ablkcipher_request *req)
 {
 	if (req)
-		return crypto_transfer_cipher_request_to_engine(dd->engine, req);
+		return crypto_transfer_ablkcipher_request_to_engine(dd->engine, req);
 
 	return 0;
 }
 
 static int omap_des_prepare_req(struct crypto_engine *engine,
-				struct ablkcipher_request *req)
+				void *areq)
 {
+	struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
 	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(
 			crypto_ablkcipher_reqtfm(req));
 	struct omap_des_dev *dd = omap_des_find_dev(ctx);
@@ -582,8 +584,9 @@ static int omap_des_prepare_req(struct crypto_engine *engine,
 }
 
 static int omap_des_crypt_req(struct crypto_engine *engine,
-			      struct ablkcipher_request *req)
+			      void *areq)
 {
+	struct ablkcipher_request *req = container_of(areq, struct ablkcipher_request, base);
 	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(
 			crypto_ablkcipher_reqtfm(req));
 	struct omap_des_dev *dd = omap_des_find_dev(ctx);
@@ -695,12 +698,23 @@ static int omap_des_cbc_decrypt(struct ablkcipher_request *req)
 	return omap_des_crypt(req, FLAGS_CBC);
 }
 
+static int omap_des_prepare_req(struct crypto_engine *engine,
+				void *areq);
+static int omap_des_crypt_req(struct crypto_engine *engine,
+			      void *areq);
+
 static int omap_des_cra_init(struct crypto_tfm *tfm)
 {
+	struct omap_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
 	pr_debug("enter\n");
 
 	tfm->crt_ablkcipher.reqsize = sizeof(struct omap_des_reqctx);
 
+	ctx->enginectx.op.prepare_request = omap_des_prepare_req;
+	ctx->enginectx.op.unprepare_request = NULL;
+	ctx->enginectx.op.do_one_request = omap_des_crypt_req;
+
 	return 0;
 }
 
@@ -1046,8 +1060,6 @@ static int omap_des_probe(struct platform_device *pdev)
 		goto err_engine;
 	}
 
-	dd->engine->prepare_cipher_request = omap_des_prepare_req;
-	dd->engine->cipher_one_request = omap_des_crypt_req;
 	err = crypto_engine_start(dd->engine);
 	if (err)
 		goto err_engine;
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 86b89ace836f..ad02aa63b519 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -229,6 +229,7 @@ struct omap_sham_dev {
 	u8			xmit_buf[BUFLEN] OMAP_ALIGNED;
 
 	unsigned long		flags;
+	int			fallback_sz;
 	struct crypto_queue	queue;
 	struct ahash_request	*req;
 
@@ -759,6 +760,13 @@ static int omap_sham_align_sgs(struct scatterlist *sg,
 	while (nbytes > 0 && sg_tmp) {
 		n++;
 
+#ifdef CONFIG_ZONE_DMA
+		if (page_zonenum(sg_page(sg_tmp)) != ZONE_DMA) {
+			aligned = false;
+			break;
+		}
+#endif
+
 		if (offset < sg_tmp->length) {
 			if (!IS_ALIGNED(offset + sg_tmp->offset, 4)) {
 				aligned = false;
@@ -809,9 +817,6 @@ static int omap_sham_prepare_request(struct ahash_request *req, bool update)
 	bool final = rctx->flags & BIT(FLAGS_FINUP);
 	int xmit_len, hash_later;
 
-	if (!req)
-		return 0;
-
 	bs = get_block_size(rctx);
 
 	if (update)
@@ -1002,7 +1007,7 @@ static int omap_sham_update_req(struct omap_sham_dev *dd)
 		 ctx->total, ctx->digcnt, (ctx->flags & BIT(FLAGS_FINUP)) != 0);
 
 	if (ctx->total < get_block_size(ctx) ||
-	    ctx->total < OMAP_SHA_DMA_THRESHOLD)
+	    ctx->total < dd->fallback_sz)
 		ctx->flags |= BIT(FLAGS_CPU);
 
 	if (ctx->flags & BIT(FLAGS_CPU))
@@ -1258,11 +1263,11 @@ static int omap_sham_final(struct ahash_request *req)
 	/*
 	 * OMAP HW accel works only with buffers >= 9.
 	 * HMAC is always >= 9 because ipad == block size.
-	 * If buffersize is less than DMA_THRESHOLD, we use fallback
+	 * If buffersize is less than fallback_sz, we use fallback
 	 * SW encoding, as using DMA + HW in this case doesn't provide
 	 * any benefit.
 	 */
-	if (!ctx->digcnt && ctx->bufcnt < OMAP_SHA_DMA_THRESHOLD)
+	if (!ctx->digcnt && ctx->bufcnt < ctx->dd->fallback_sz)
 		return omap_sham_final_shash(req);
 	else if (ctx->bufcnt)
 		return omap_sham_enqueue(req, OP_FINAL);
@@ -1761,7 +1766,7 @@ static void omap_sham_done_task(unsigned long data)
 		if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) {
 			/* hash or semi-hash ready */
 			clear_bit(FLAGS_DMA_READY, &dd->flags);
-				goto finish;
+			goto finish;
 		}
 	}
 
@@ -2013,6 +2018,85 @@ err:
 	return err;
 }
 
+static ssize_t fallback_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct omap_sham_dev *dd = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", dd->fallback_sz);
+}
+
+static ssize_t fallback_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t size)
+{
+	struct omap_sham_dev *dd = dev_get_drvdata(dev);
+	ssize_t status;
+	long value;
+
+	status = kstrtol(buf, 0, &value);
+	if (status)
+		return status;
+
+	/* HW accelerator only works with buffers > 9 */
+	if (value < 9) {
+		dev_err(dev, "minimum fallback size 9\n");
+		return -EINVAL;
+	}
+
+	dd->fallback_sz = value;
+
+	return size;
+}
+
+static ssize_t queue_len_show(struct device *dev, struct device_attribute *attr,
+			      char *buf)
+{
+	struct omap_sham_dev *dd = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", dd->queue.max_qlen);
+}
+
+static ssize_t queue_len_store(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t size)
+{
+	struct omap_sham_dev *dd = dev_get_drvdata(dev);
+	ssize_t status;
+	long value;
+	unsigned long flags;
+
+	status = kstrtol(buf, 0, &value);
+	if (status)
+		return status;
+
+	if (value < 1)
+		return -EINVAL;
+
+	/*
+	 * Changing the queue size in fly is safe, if size becomes smaller
+	 * than current size, it will just not accept new entries until
+	 * it has shrank enough.
+	 */
+	spin_lock_irqsave(&dd->lock, flags);
+	dd->queue.max_qlen = value;
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	return size;
+}
+
+static DEVICE_ATTR_RW(queue_len);
+static DEVICE_ATTR_RW(fallback);
+
+static struct attribute *omap_sham_attrs[] = {
+	&dev_attr_queue_len.attr,
+	&dev_attr_fallback.attr,
+	NULL,
+};
+
+static struct attribute_group omap_sham_attr_group = {
+	.attrs = omap_sham_attrs,
+};
+
 static int omap_sham_probe(struct platform_device *pdev)
 {
 	struct omap_sham_dev *dd;
@@ -2074,6 +2158,8 @@ static int omap_sham_probe(struct platform_device *pdev)
 	pm_runtime_use_autosuspend(dev);
 	pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY);
 
+	dd->fallback_sz = OMAP_SHA_DMA_THRESHOLD;
+
 	pm_runtime_enable(dev);
 	pm_runtime_irq_safe(dev);
 
@@ -2111,6 +2197,12 @@ static int omap_sham_probe(struct platform_device *pdev)
 		}
 	}
 
+	err = sysfs_create_group(&dev->kobj, &omap_sham_attr_group);
+	if (err) {
+		dev_err(dev, "could not create sysfs device attrs\n");
+		goto err_algs;
+	}
+
 	return 0;
 
 err_algs:
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 4ef52c9d72fc..a4df966adbf6 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -499,10 +499,12 @@ static int spacc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	memcpy(ctx->hash_ctx, keys.authkey, keys.authkeylen);
 	ctx->hash_key_len = keys.authkeylen;
 
+	memzero_explicit(&keys, sizeof(keys));
 	return 0;
 
 badkey:
 	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index baffae817259..1138e41d6805 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -546,11 +546,14 @@ static int qat_alg_aead_init_sessions(struct crypto_aead *tfm, const u8 *key,
 	if (qat_alg_aead_init_dec_session(tfm, alg, &keys, mode))
 		goto error;
 
+	memzero_explicit(&keys, sizeof(keys));
 	return 0;
 bad_key:
 	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 error:
+	memzero_explicit(&keys, sizeof(keys));
 	return -EFAULT;
 }
 
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 13c52d6bf630..320e7854b4ee 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -969,7 +969,8 @@ unmap_src:
 	return ret;
 }
 
-int qat_rsa_set_n(struct qat_rsa_ctx *ctx, const char *value, size_t vlen)
+static int qat_rsa_set_n(struct qat_rsa_ctx *ctx, const char *value,
+			 size_t vlen)
 {
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev = &GET_DEV(inst->accel_dev);
@@ -1000,7 +1001,8 @@ err:
 	return ret;
 }
 
-int qat_rsa_set_e(struct qat_rsa_ctx *ctx, const char *value, size_t vlen)
+static int qat_rsa_set_e(struct qat_rsa_ctx *ctx, const char *value,
+			 size_t vlen)
 {
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev = &GET_DEV(inst->accel_dev);
@@ -1024,7 +1026,8 @@ int qat_rsa_set_e(struct qat_rsa_ctx *ctx, const char *value, size_t vlen)
 	return 0;
 }
 
-int qat_rsa_set_d(struct qat_rsa_ctx *ctx, const char *value, size_t vlen)
+static int qat_rsa_set_d(struct qat_rsa_ctx *ctx, const char *value,
+			 size_t vlen)
 {
 	struct qat_crypto_instance *inst = ctx->inst;
 	struct device *dev = &GET_DEV(inst->accel_dev);
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 5d64c08b7f47..bf7163042569 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -404,29 +404,31 @@ static const struct of_device_id s5p_sss_dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, s5p_sss_dt_match);
 
-static inline struct samsung_aes_variant *find_s5p_sss_version
-				   (struct platform_device *pdev)
+static inline const struct samsung_aes_variant *find_s5p_sss_version
+				   (const struct platform_device *pdev)
 {
 	if (IS_ENABLED(CONFIG_OF) && (pdev->dev.of_node)) {
 		const struct of_device_id *match;
 
 		match = of_match_node(s5p_sss_dt_match,
 					pdev->dev.of_node);
-		return (struct samsung_aes_variant *)match->data;
+		return (const struct samsung_aes_variant *)match->data;
 	}
-	return (struct samsung_aes_variant *)
+	return (const struct samsung_aes_variant *)
 			platform_get_device_id(pdev)->driver_data;
 }
 
 static struct s5p_aes_dev *s5p_dev;
 
-static void s5p_set_dma_indata(struct s5p_aes_dev *dev, struct scatterlist *sg)
+static void s5p_set_dma_indata(struct s5p_aes_dev *dev,
+			       const struct scatterlist *sg)
 {
 	SSS_WRITE(dev, FCBRDMAS, sg_dma_address(sg));
 	SSS_WRITE(dev, FCBRDMAL, sg_dma_len(sg));
 }
 
-static void s5p_set_dma_outdata(struct s5p_aes_dev *dev, struct scatterlist *sg)
+static void s5p_set_dma_outdata(struct s5p_aes_dev *dev,
+				const struct scatterlist *sg)
 {
 	SSS_WRITE(dev, FCBTDMAS, sg_dma_address(sg));
 	SSS_WRITE(dev, FCBTDMAL, sg_dma_len(sg));
@@ -619,7 +621,7 @@ static inline void s5p_hash_write(struct s5p_aes_dev *dd,
  * @sg:		scatterlist ready to DMA transmit
  */
 static void s5p_set_dma_hashdata(struct s5p_aes_dev *dev,
-				 struct scatterlist *sg)
+				 const struct scatterlist *sg)
 {
 	dev->hash_sg_cnt--;
 	SSS_WRITE(dev, FCHRDMAS, sg_dma_address(sg));
@@ -792,9 +794,9 @@ static void s5p_hash_read_msg(struct ahash_request *req)
  * @ctx:	request context
  */
 static void s5p_hash_write_ctx_iv(struct s5p_aes_dev *dd,
-				  struct s5p_hash_reqctx *ctx)
+				  const struct s5p_hash_reqctx *ctx)
 {
-	u32 *hash = (u32 *)ctx->digest;
+	const u32 *hash = (const u32 *)ctx->digest;
 	unsigned int i;
 
 	for (i = 0; i < ctx->nregs; i++)
@@ -818,7 +820,7 @@ static void s5p_hash_write_iv(struct ahash_request *req)
  */
 static void s5p_hash_copy_result(struct ahash_request *req)
 {
-	struct s5p_hash_reqctx *ctx = ahash_request_ctx(req);
+	const struct s5p_hash_reqctx *ctx = ahash_request_ctx(req);
 
 	if (!req->result)
 		return;
@@ -1210,9 +1212,6 @@ static int s5p_hash_prepare_request(struct ahash_request *req, bool update)
 	int xmit_len, hash_later, nbytes;
 	int ret;
 
-	if (!req)
-		return 0;
-
 	if (update)
 		nbytes = req->nbytes;
 	else
@@ -1293,7 +1292,7 @@ static int s5p_hash_prepare_request(struct ahash_request *req, bool update)
  */
 static void s5p_hash_update_dma_stop(struct s5p_aes_dev *dd)
 {
-	struct s5p_hash_reqctx *ctx = ahash_request_ctx(dd->hash_req);
+	const struct s5p_hash_reqctx *ctx = ahash_request_ctx(dd->hash_req);
 
 	dma_unmap_sg(dd->dev, ctx->sg, ctx->sg_len, DMA_TO_DEVICE);
 	clear_bit(HASH_FLAGS_DMA_ACTIVE, &dd->hash_flags);
@@ -1720,7 +1719,7 @@ static void s5p_hash_cra_exit(struct crypto_tfm *tfm)
  */
 static int s5p_hash_export(struct ahash_request *req, void *out)
 {
-	struct s5p_hash_reqctx *ctx = ahash_request_ctx(req);
+	const struct s5p_hash_reqctx *ctx = ahash_request_ctx(req);
 
 	memcpy(out, ctx, sizeof(*ctx) + ctx->bufcnt);
 
@@ -1834,7 +1833,8 @@ static struct ahash_alg algs_sha1_md5_sha256[] = {
 };
 
 static void s5p_set_aes(struct s5p_aes_dev *dev,
-			uint8_t *key, uint8_t *iv, unsigned int keylen)
+			const uint8_t *key, const uint8_t *iv,
+			unsigned int keylen)
 {
 	void __iomem *keystart;
 
@@ -2153,7 +2153,7 @@ static int s5p_aes_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	int i, j, err = -ENODEV;
-	struct samsung_aes_variant *variant;
+	const struct samsung_aes_variant *variant;
 	struct s5p_aes_dev *pdata;
 	struct resource *res;
 	unsigned int hash_i;
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 08e7bdcaa6e3..0f2245e1af2b 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -1397,11 +1397,9 @@ static int sahara_probe(struct platform_device *pdev)
 	int err;
 	int i;
 
-	dev = devm_kzalloc(&pdev->dev, sizeof(struct sahara_dev), GFP_KERNEL);
-	if (dev == NULL) {
-		dev_err(&pdev->dev, "unable to alloc data struct.\n");
+	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
+	if (!dev)
 		return -ENOMEM;
-	}
 
 	dev->device = &pdev->dev;
 	platform_set_drvdata(pdev, dev);
diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index 4a06a7a665ee..c5d3efc54a4f 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -17,6 +17,7 @@
 #include <crypto/des.h>
 #include <crypto/engine.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/internal/aead.h>
 
 #define DRIVER_NAME             "stm32-cryp"
 
@@ -29,8 +30,12 @@
 #define FLG_ECB                 BIT(4)
 #define FLG_CBC                 BIT(5)
 #define FLG_CTR                 BIT(6)
+#define FLG_GCM                 BIT(7)
+#define FLG_CCM                 BIT(8)
 /* Mode mask = bits [15..0] */
 #define FLG_MODE_MASK           GENMASK(15, 0)
+/* Bit [31..16] status  */
+#define FLG_CCM_PADDED_WA       BIT(16)
 
 /* Registers */
 #define CRYP_CR                 0x00000000
@@ -53,6 +58,8 @@
 #define CRYP_IV0RR              0x00000044
 #define CRYP_IV1LR              0x00000048
 #define CRYP_IV1RR              0x0000004C
+#define CRYP_CSGCMCCM0R         0x00000050
+#define CRYP_CSGCM0R            0x00000070
 
 /* Registers values */
 #define CR_DEC_NOT_ENC          0x00000004
@@ -64,6 +71,8 @@
 #define CR_AES_CBC              0x00000028
 #define CR_AES_CTR              0x00000030
 #define CR_AES_KP               0x00000038
+#define CR_AES_GCM              0x00080000
+#define CR_AES_CCM              0x00080008
 #define CR_AES_UNKNOWN          0xFFFFFFFF
 #define CR_ALGO_MASK            0x00080038
 #define CR_DATA32               0x00000000
@@ -75,6 +84,12 @@
 #define CR_KEY256               0x00000200
 #define CR_FFLUSH               0x00004000
 #define CR_CRYPEN               0x00008000
+#define CR_PH_INIT              0x00000000
+#define CR_PH_HEADER            0x00010000
+#define CR_PH_PAYLOAD           0x00020000
+#define CR_PH_FINAL             0x00030000
+#define CR_PH_MASK              0x00030000
+#define CR_NBPBL_SHIFT          20
 
 #define SR_BUSY                 0x00000010
 #define SR_OFNE                 0x00000004
@@ -87,10 +102,17 @@
 
 /* Misc */
 #define AES_BLOCK_32            (AES_BLOCK_SIZE / sizeof(u32))
+#define GCM_CTR_INIT            2
 #define _walked_in              (cryp->in_walk.offset - cryp->in_sg->offset)
 #define _walked_out             (cryp->out_walk.offset - cryp->out_sg->offset)
 
+struct stm32_cryp_caps {
+	bool                    swap_final;
+	bool                    padding_wa;
+};
+
 struct stm32_cryp_ctx {
+	struct crypto_engine_ctx enginectx;
 	struct stm32_cryp       *cryp;
 	int                     keylen;
 	u32                     key[AES_KEYSIZE_256 / sizeof(u32)];
@@ -108,13 +130,16 @@ struct stm32_cryp {
 	struct clk              *clk;
 	unsigned long           flags;
 	u32                     irq_status;
+	const struct stm32_cryp_caps *caps;
 	struct stm32_cryp_ctx   *ctx;
 
 	struct crypto_engine    *engine;
 
-	struct mutex            lock; /* protects req */
+	struct mutex            lock; /* protects req / areq */
 	struct ablkcipher_request *req;
+	struct aead_request     *areq;
 
+	size_t                  authsize;
 	size_t                  hw_blocksize;
 
 	size_t                  total_in;
@@ -137,6 +162,7 @@ struct stm32_cryp {
 	struct scatter_walk     out_walk;
 
 	u32                     last_ctr[4];
+	u32                     gcm_ctr;
 };
 
 struct stm32_cryp_list {
@@ -179,6 +205,16 @@ static inline bool is_ctr(struct stm32_cryp *cryp)
 	return cryp->flags & FLG_CTR;
 }
 
+static inline bool is_gcm(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_GCM;
+}
+
+static inline bool is_ccm(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_CCM;
+}
+
 static inline bool is_encrypt(struct stm32_cryp *cryp)
 {
 	return cryp->flags & FLG_ENCRYPT;
@@ -207,6 +243,24 @@ static inline int stm32_cryp_wait_busy(struct stm32_cryp *cryp)
 			!(status & SR_BUSY), 10, 100000);
 }
 
+static inline int stm32_cryp_wait_enable(struct stm32_cryp *cryp)
+{
+	u32 status;
+
+	return readl_relaxed_poll_timeout(cryp->regs + CRYP_CR, status,
+			!(status & CR_CRYPEN), 10, 100000);
+}
+
+static inline int stm32_cryp_wait_output(struct stm32_cryp *cryp)
+{
+	u32 status;
+
+	return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status,
+			status & SR_OFNE, 10, 100000);
+}
+
+static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp);
+
 static struct stm32_cryp *stm32_cryp_find_dev(struct stm32_cryp_ctx *ctx)
 {
 	struct stm32_cryp *tmp, *cryp = NULL;
@@ -365,6 +419,12 @@ static u32 stm32_cryp_get_hw_mode(struct stm32_cryp *cryp)
 	if (is_aes(cryp) && is_ctr(cryp))
 		return CR_AES_CTR;
 
+	if (is_aes(cryp) && is_gcm(cryp))
+		return CR_AES_GCM;
+
+	if (is_aes(cryp) && is_ccm(cryp))
+		return CR_AES_CCM;
+
 	if (is_des(cryp) && is_ecb(cryp))
 		return CR_DES_ECB;
 
@@ -381,6 +441,79 @@ static u32 stm32_cryp_get_hw_mode(struct stm32_cryp *cryp)
 	return CR_AES_UNKNOWN;
 }
 
+static unsigned int stm32_cryp_get_input_text_len(struct stm32_cryp *cryp)
+{
+	return is_encrypt(cryp) ? cryp->areq->cryptlen :
+				  cryp->areq->cryptlen - cryp->authsize;
+}
+
+static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg)
+{
+	int ret;
+	u32 iv[4];
+
+	/* Phase 1 : init */
+	memcpy(iv, cryp->areq->iv, 12);
+	iv[3] = cpu_to_be32(GCM_CTR_INIT);
+	cryp->gcm_ctr = GCM_CTR_INIT;
+	stm32_cryp_hw_write_iv(cryp, iv);
+
+	stm32_cryp_write(cryp, CRYP_CR, cfg | CR_PH_INIT | CR_CRYPEN);
+
+	/* Wait for end of processing */
+	ret = stm32_cryp_wait_enable(cryp);
+	if (ret)
+		dev_err(cryp->dev, "Timeout (gcm init)\n");
+
+	return ret;
+}
+
+static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg)
+{
+	int ret;
+	u8 iv[AES_BLOCK_SIZE], b0[AES_BLOCK_SIZE];
+	u32 *d;
+	unsigned int i, textlen;
+
+	/* Phase 1 : init. Firstly set the CTR value to 1 (not 0) */
+	memcpy(iv, cryp->areq->iv, AES_BLOCK_SIZE);
+	memset(iv + AES_BLOCK_SIZE - 1 - iv[0], 0, iv[0] + 1);
+	iv[AES_BLOCK_SIZE - 1] = 1;
+	stm32_cryp_hw_write_iv(cryp, (u32 *)iv);
+
+	/* Build B0 */
+	memcpy(b0, iv, AES_BLOCK_SIZE);
+
+	b0[0] |= (8 * ((cryp->authsize - 2) / 2));
+
+	if (cryp->areq->assoclen)
+		b0[0] |= 0x40;
+
+	textlen = stm32_cryp_get_input_text_len(cryp);
+
+	b0[AES_BLOCK_SIZE - 2] = textlen >> 8;
+	b0[AES_BLOCK_SIZE - 1] = textlen & 0xFF;
+
+	/* Enable HW */
+	stm32_cryp_write(cryp, CRYP_CR, cfg | CR_PH_INIT | CR_CRYPEN);
+
+	/* Write B0 */
+	d = (u32 *)b0;
+
+	for (i = 0; i < AES_BLOCK_32; i++) {
+		if (!cryp->caps->padding_wa)
+			*d = cpu_to_be32(*d);
+		stm32_cryp_write(cryp, CRYP_DIN, *d++);
+	}
+
+	/* Wait for end of processing */
+	ret = stm32_cryp_wait_enable(cryp);
+	if (ret)
+		dev_err(cryp->dev, "Timeout (ccm init)\n");
+
+	return ret;
+}
+
 static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
 {
 	int ret;
@@ -436,6 +569,29 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
 	stm32_cryp_write(cryp, CRYP_CR, cfg);
 
 	switch (hw_mode) {
+	case CR_AES_GCM:
+	case CR_AES_CCM:
+		/* Phase 1 : init */
+		if (hw_mode == CR_AES_CCM)
+			ret = stm32_cryp_ccm_init(cryp, cfg);
+		else
+			ret = stm32_cryp_gcm_init(cryp, cfg);
+
+		if (ret)
+			return ret;
+
+		/* Phase 2 : header (authenticated data) */
+		if (cryp->areq->assoclen) {
+			cfg |= CR_PH_HEADER;
+		} else if (stm32_cryp_get_input_text_len(cryp)) {
+			cfg |= CR_PH_PAYLOAD;
+			stm32_cryp_write(cryp, CRYP_CR, cfg);
+		} else {
+			cfg |= CR_PH_INIT;
+		}
+
+		break;
+
 	case CR_DES_CBC:
 	case CR_TDES_CBC:
 	case CR_AES_CBC:
@@ -452,12 +608,16 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
 
 	stm32_cryp_write(cryp, CRYP_CR, cfg);
 
+	cryp->flags &= ~FLG_CCM_PADDED_WA;
+
 	return 0;
 }
 
-static void stm32_cryp_finish_req(struct stm32_cryp *cryp)
+static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err)
 {
-	int err = 0;
+	if (!err && (is_gcm(cryp) || is_ccm(cryp)))
+		/* Phase 4 : output tag */
+		err = stm32_cryp_read_auth_tag(cryp);
 
 	if (cryp->sgs_copied) {
 		void *buf_in, *buf_out;
@@ -478,8 +638,14 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp)
 		free_pages((unsigned long)buf_out, pages);
 	}
 
-	crypto_finalize_cipher_request(cryp->engine, cryp->req, err);
-	cryp->req = NULL;
+	if (is_gcm(cryp) || is_ccm(cryp)) {
+		crypto_finalize_aead_request(cryp->engine, cryp->areq, err);
+		cryp->areq = NULL;
+	} else {
+		crypto_finalize_ablkcipher_request(cryp->engine, cryp->req,
+						   err);
+		cryp->req = NULL;
+	}
 
 	memset(cryp->ctx->key, 0, cryp->ctx->keylen);
 
@@ -494,10 +660,36 @@ static int stm32_cryp_cpu_start(struct stm32_cryp *cryp)
 	return 0;
 }
 
+static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq);
+static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
+					 void *areq);
+
 static int stm32_cryp_cra_init(struct crypto_tfm *tfm)
 {
+	struct stm32_cryp_ctx *ctx = crypto_tfm_ctx(tfm);
+
 	tfm->crt_ablkcipher.reqsize = sizeof(struct stm32_cryp_reqctx);
 
+	ctx->enginectx.op.do_one_request = stm32_cryp_cipher_one_req;
+	ctx->enginectx.op.prepare_request = stm32_cryp_prepare_cipher_req;
+	ctx->enginectx.op.unprepare_request = NULL;
+	return 0;
+}
+
+static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq);
+static int stm32_cryp_prepare_aead_req(struct crypto_engine *engine,
+				       void *areq);
+
+static int stm32_cryp_aes_aead_init(struct crypto_aead *tfm)
+{
+	struct stm32_cryp_ctx *ctx = crypto_aead_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct stm32_cryp_reqctx);
+
+	ctx->enginectx.op.do_one_request = stm32_cryp_aead_one_req;
+	ctx->enginectx.op.prepare_request = stm32_cryp_prepare_aead_req;
+	ctx->enginectx.op.unprepare_request = NULL;
+
 	return 0;
 }
 
@@ -513,7 +705,21 @@ static int stm32_cryp_crypt(struct ablkcipher_request *req, unsigned long mode)
 
 	rctx->mode = mode;
 
-	return crypto_transfer_cipher_request_to_engine(cryp->engine, req);
+	return crypto_transfer_ablkcipher_request_to_engine(cryp->engine, req);
+}
+
+static int stm32_cryp_aead_crypt(struct aead_request *req, unsigned long mode)
+{
+	struct stm32_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct stm32_cryp_reqctx *rctx = aead_request_ctx(req);
+	struct stm32_cryp *cryp = stm32_cryp_find_dev(ctx);
+
+	if (!cryp)
+		return -ENODEV;
+
+	rctx->mode = mode;
+
+	return crypto_transfer_aead_request_to_engine(cryp->engine, req);
 }
 
 static int stm32_cryp_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
@@ -555,6 +761,46 @@ static int stm32_cryp_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 		return stm32_cryp_setkey(tfm, key, keylen);
 }
 
+static int stm32_cryp_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+				      unsigned int keylen)
+{
+	struct stm32_cryp_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int stm32_cryp_aes_gcm_setauthsize(struct crypto_aead *tfm,
+					  unsigned int authsize)
+{
+	return authsize == AES_BLOCK_SIZE ? 0 : -EINVAL;
+}
+
+static int stm32_cryp_aes_ccm_setauthsize(struct crypto_aead *tfm,
+					  unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 6:
+	case 8:
+	case 10:
+	case 12:
+	case 14:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int stm32_cryp_aes_ecb_encrypt(struct ablkcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_AES | FLG_ECB | FLG_ENCRYPT);
@@ -585,6 +831,26 @@ static int stm32_cryp_aes_ctr_decrypt(struct ablkcipher_request *req)
 	return stm32_cryp_crypt(req, FLG_AES | FLG_CTR);
 }
 
+static int stm32_cryp_aes_gcm_encrypt(struct aead_request *req)
+{
+	return stm32_cryp_aead_crypt(req, FLG_AES | FLG_GCM | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_aes_gcm_decrypt(struct aead_request *req)
+{
+	return stm32_cryp_aead_crypt(req, FLG_AES | FLG_GCM);
+}
+
+static int stm32_cryp_aes_ccm_encrypt(struct aead_request *req)
+{
+	return stm32_cryp_aead_crypt(req, FLG_AES | FLG_CCM | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_aes_ccm_decrypt(struct aead_request *req)
+{
+	return stm32_cryp_aead_crypt(req, FLG_AES | FLG_CCM);
+}
+
 static int stm32_cryp_des_ecb_encrypt(struct ablkcipher_request *req)
 {
 	return stm32_cryp_crypt(req, FLG_DES | FLG_ECB | FLG_ENCRYPT);
@@ -625,18 +891,19 @@ static int stm32_cryp_tdes_cbc_decrypt(struct ablkcipher_request *req)
 	return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC);
 }
 
-static int stm32_cryp_prepare_req(struct crypto_engine *engine,
-				  struct ablkcipher_request *req)
+static int stm32_cryp_prepare_req(struct ablkcipher_request *req,
+				  struct aead_request *areq)
 {
 	struct stm32_cryp_ctx *ctx;
 	struct stm32_cryp *cryp;
 	struct stm32_cryp_reqctx *rctx;
 	int ret;
 
-	if (!req)
+	if (!req && !areq)
 		return -EINVAL;
 
-	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	ctx = req ? crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)) :
+		    crypto_aead_ctx(crypto_aead_reqtfm(areq));
 
 	cryp = ctx->cryp;
 
@@ -645,7 +912,7 @@ static int stm32_cryp_prepare_req(struct crypto_engine *engine,
 
 	mutex_lock(&cryp->lock);
 
-	rctx = ablkcipher_request_ctx(req);
+	rctx = req ? ablkcipher_request_ctx(req) : aead_request_ctx(areq);
 	rctx->mode &= FLG_MODE_MASK;
 
 	ctx->cryp = cryp;
@@ -654,15 +921,48 @@ static int stm32_cryp_prepare_req(struct crypto_engine *engine,
 	cryp->hw_blocksize = is_aes(cryp) ? AES_BLOCK_SIZE : DES_BLOCK_SIZE;
 	cryp->ctx = ctx;
 
-	cryp->req = req;
-	cryp->total_in = req->nbytes;
-	cryp->total_out = cryp->total_in;
+	if (req) {
+		cryp->req = req;
+		cryp->total_in = req->nbytes;
+		cryp->total_out = cryp->total_in;
+	} else {
+		/*
+		 * Length of input and output data:
+		 * Encryption case:
+		 *  INPUT  =   AssocData  ||   PlainText
+		 *          <- assoclen ->  <- cryptlen ->
+		 *          <------- total_in ----------->
+		 *
+		 *  OUTPUT =   AssocData  ||  CipherText  ||   AuthTag
+		 *          <- assoclen ->  <- cryptlen ->  <- authsize ->
+		 *          <---------------- total_out ----------------->
+		 *
+		 * Decryption case:
+		 *  INPUT  =   AssocData  ||  CipherText  ||  AuthTag
+		 *          <- assoclen ->  <--------- cryptlen --------->
+		 *                                          <- authsize ->
+		 *          <---------------- total_in ------------------>
+		 *
+		 *  OUTPUT =   AssocData  ||   PlainText
+		 *          <- assoclen ->  <- crypten - authsize ->
+		 *          <---------- total_out ----------------->
+		 */
+		cryp->areq = areq;
+		cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(areq));
+		cryp->total_in = areq->assoclen + areq->cryptlen;
+		if (is_encrypt(cryp))
+			/* Append auth tag to output */
+			cryp->total_out = cryp->total_in + cryp->authsize;
+		else
+			/* No auth tag in output */
+			cryp->total_out = cryp->total_in - cryp->authsize;
+	}
 
 	cryp->total_in_save = cryp->total_in;
 	cryp->total_out_save = cryp->total_out;
 
-	cryp->in_sg = req->src;
-	cryp->out_sg = req->dst;
+	cryp->in_sg = req ? req->src : areq->src;
+	cryp->out_sg = req ? req->dst : areq->dst;
 	cryp->out_sg_save = cryp->out_sg;
 
 	cryp->in_sg_len = sg_nents_for_len(cryp->in_sg, cryp->total_in);
@@ -686,6 +986,12 @@ static int stm32_cryp_prepare_req(struct crypto_engine *engine,
 	scatterwalk_start(&cryp->in_walk, cryp->in_sg);
 	scatterwalk_start(&cryp->out_walk, cryp->out_sg);
 
+	if (is_gcm(cryp) || is_ccm(cryp)) {
+		/* In output, jump after assoc data */
+		scatterwalk_advance(&cryp->out_walk, cryp->areq->assoclen);
+		cryp->total_out -= cryp->areq->assoclen;
+	}
+
 	ret = stm32_cryp_hw_init(cryp);
 out:
 	if (ret)
@@ -695,14 +1001,20 @@ out:
 }
 
 static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
-					 struct ablkcipher_request *req)
+					 void *areq)
 {
-	return stm32_cryp_prepare_req(engine, req);
+	struct ablkcipher_request *req = container_of(areq,
+						      struct ablkcipher_request,
+						      base);
+
+	return stm32_cryp_prepare_req(req, NULL);
 }
 
-static int stm32_cryp_cipher_one_req(struct crypto_engine *engine,
-				     struct ablkcipher_request *req)
+static int stm32_cryp_cipher_one_req(struct crypto_engine *engine, void *areq)
 {
+	struct ablkcipher_request *req = container_of(areq,
+						      struct ablkcipher_request,
+						      base);
 	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(
 			crypto_ablkcipher_reqtfm(req));
 	struct stm32_cryp *cryp = ctx->cryp;
@@ -713,6 +1025,34 @@ static int stm32_cryp_cipher_one_req(struct crypto_engine *engine,
 	return stm32_cryp_cpu_start(cryp);
 }
 
+static int stm32_cryp_prepare_aead_req(struct crypto_engine *engine, void *areq)
+{
+	struct aead_request *req = container_of(areq, struct aead_request,
+						base);
+
+	return stm32_cryp_prepare_req(NULL, req);
+}
+
+static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq)
+{
+	struct aead_request *req = container_of(areq, struct aead_request,
+						base);
+	struct stm32_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct stm32_cryp *cryp = ctx->cryp;
+
+	if (!cryp)
+		return -ENODEV;
+
+	if (unlikely(!cryp->areq->assoclen &&
+		     !stm32_cryp_get_input_text_len(cryp))) {
+		/* No input data to process: get tag and finish */
+		stm32_cryp_finish_req(cryp, 0);
+		return 0;
+	}
+
+	return stm32_cryp_cpu_start(cryp);
+}
+
 static u32 *stm32_cryp_next_out(struct stm32_cryp *cryp, u32 *dst,
 				unsigned int n)
 {
@@ -745,6 +1085,111 @@ static u32 *stm32_cryp_next_in(struct stm32_cryp *cryp, u32 *src,
 	return (u32 *)((u8 *)src + n);
 }
 
+static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
+{
+	u32 cfg, size_bit, *dst, d32;
+	u8 *d8;
+	unsigned int i, j;
+	int ret = 0;
+
+	/* Update Config */
+	cfg = stm32_cryp_read(cryp, CRYP_CR);
+
+	cfg &= ~CR_PH_MASK;
+	cfg |= CR_PH_FINAL;
+	cfg &= ~CR_DEC_NOT_ENC;
+	cfg |= CR_CRYPEN;
+
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	if (is_gcm(cryp)) {
+		/* GCM: write aad and payload size (in bits) */
+		size_bit = cryp->areq->assoclen * 8;
+		if (cryp->caps->swap_final)
+			size_bit = cpu_to_be32(size_bit);
+
+		stm32_cryp_write(cryp, CRYP_DIN, 0);
+		stm32_cryp_write(cryp, CRYP_DIN, size_bit);
+
+		size_bit = is_encrypt(cryp) ? cryp->areq->cryptlen :
+				cryp->areq->cryptlen - AES_BLOCK_SIZE;
+		size_bit *= 8;
+		if (cryp->caps->swap_final)
+			size_bit = cpu_to_be32(size_bit);
+
+		stm32_cryp_write(cryp, CRYP_DIN, 0);
+		stm32_cryp_write(cryp, CRYP_DIN, size_bit);
+	} else {
+		/* CCM: write CTR0 */
+		u8 iv[AES_BLOCK_SIZE];
+		u32 *iv32 = (u32 *)iv;
+
+		memcpy(iv, cryp->areq->iv, AES_BLOCK_SIZE);
+		memset(iv + AES_BLOCK_SIZE - 1 - iv[0], 0, iv[0] + 1);
+
+		for (i = 0; i < AES_BLOCK_32; i++) {
+			if (!cryp->caps->padding_wa)
+				*iv32 = cpu_to_be32(*iv32);
+			stm32_cryp_write(cryp, CRYP_DIN, *iv32++);
+		}
+	}
+
+	/* Wait for output data */
+	ret = stm32_cryp_wait_output(cryp);
+	if (ret) {
+		dev_err(cryp->dev, "Timeout (read tag)\n");
+		return ret;
+	}
+
+	if (is_encrypt(cryp)) {
+		/* Get and write tag */
+		dst = sg_virt(cryp->out_sg) + _walked_out;
+
+		for (i = 0; i < AES_BLOCK_32; i++) {
+			if (cryp->total_out >= sizeof(u32)) {
+				/* Read a full u32 */
+				*dst = stm32_cryp_read(cryp, CRYP_DOUT);
+
+				dst = stm32_cryp_next_out(cryp, dst,
+							  sizeof(u32));
+				cryp->total_out -= sizeof(u32);
+			} else if (!cryp->total_out) {
+				/* Empty fifo out (data from input padding) */
+				stm32_cryp_read(cryp, CRYP_DOUT);
+			} else {
+				/* Read less than an u32 */
+				d32 = stm32_cryp_read(cryp, CRYP_DOUT);
+				d8 = (u8 *)&d32;
+
+				for (j = 0; j < cryp->total_out; j++) {
+					*((u8 *)dst) = *(d8++);
+					dst = stm32_cryp_next_out(cryp, dst, 1);
+				}
+				cryp->total_out = 0;
+			}
+		}
+	} else {
+		/* Get and check tag */
+		u32 in_tag[AES_BLOCK_32], out_tag[AES_BLOCK_32];
+
+		scatterwalk_map_and_copy(in_tag, cryp->in_sg,
+					 cryp->total_in_save - cryp->authsize,
+					 cryp->authsize, 0);
+
+		for (i = 0; i < AES_BLOCK_32; i++)
+			out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+
+		if (crypto_memneq(in_tag, out_tag, cryp->authsize))
+			ret = -EBADMSG;
+	}
+
+	/* Disable cryp */
+	cfg &= ~CR_CRYPEN;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	return ret;
+}
+
 static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp)
 {
 	u32 cr;
@@ -777,17 +1222,24 @@ static bool stm32_cryp_irq_read_data(struct stm32_cryp *cryp)
 	unsigned int i, j;
 	u32 d32, *dst;
 	u8 *d8;
+	size_t tag_size;
+
+	/* Do no read tag now (if any) */
+	if (is_encrypt(cryp) && (is_gcm(cryp) || is_ccm(cryp)))
+		tag_size = cryp->authsize;
+	else
+		tag_size = 0;
 
 	dst = sg_virt(cryp->out_sg) + _walked_out;
 
 	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) {
-		if (likely(cryp->total_out >= sizeof(u32))) {
+		if (likely(cryp->total_out - tag_size >= sizeof(u32))) {
 			/* Read a full u32 */
 			*dst = stm32_cryp_read(cryp, CRYP_DOUT);
 
 			dst = stm32_cryp_next_out(cryp, dst, sizeof(u32));
 			cryp->total_out -= sizeof(u32);
-		} else if (!cryp->total_out) {
+		} else if (cryp->total_out == tag_size) {
 			/* Empty fifo out (data from input padding) */
 			d32 = stm32_cryp_read(cryp, CRYP_DOUT);
 		} else {
@@ -795,15 +1247,15 @@ static bool stm32_cryp_irq_read_data(struct stm32_cryp *cryp)
 			d32 = stm32_cryp_read(cryp, CRYP_DOUT);
 			d8 = (u8 *)&d32;
 
-			for (j = 0; j < cryp->total_out; j++) {
+			for (j = 0; j < cryp->total_out - tag_size; j++) {
 				*((u8 *)dst) = *(d8++);
 				dst = stm32_cryp_next_out(cryp, dst, 1);
 			}
-			cryp->total_out = 0;
+			cryp->total_out = tag_size;
 		}
 	}
 
-	return !cryp->total_out || !cryp->total_in;
+	return !(cryp->total_out - tag_size) || !cryp->total_in;
 }
 
 static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp)
@@ -811,33 +1263,219 @@ static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp)
 	unsigned int i, j;
 	u32 *src;
 	u8 d8[4];
+	size_t tag_size;
+
+	/* Do no write tag (if any) */
+	if (is_decrypt(cryp) && (is_gcm(cryp) || is_ccm(cryp)))
+		tag_size = cryp->authsize;
+	else
+		tag_size = 0;
 
 	src = sg_virt(cryp->in_sg) + _walked_in;
 
 	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) {
-		if (likely(cryp->total_in >= sizeof(u32))) {
+		if (likely(cryp->total_in - tag_size >= sizeof(u32))) {
 			/* Write a full u32 */
 			stm32_cryp_write(cryp, CRYP_DIN, *src);
 
 			src = stm32_cryp_next_in(cryp, src, sizeof(u32));
 			cryp->total_in -= sizeof(u32);
-		} else if (!cryp->total_in) {
+		} else if (cryp->total_in == tag_size) {
 			/* Write padding data */
 			stm32_cryp_write(cryp, CRYP_DIN, 0);
 		} else {
 			/* Write less than an u32 */
 			memset(d8, 0, sizeof(u32));
-			for (j = 0; j < cryp->total_in; j++) {
+			for (j = 0; j < cryp->total_in - tag_size; j++) {
 				d8[j] = *((u8 *)src);
 				src = stm32_cryp_next_in(cryp, src, 1);
 			}
 
 			stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
-			cryp->total_in = 0;
+			cryp->total_in = tag_size;
 		}
 	}
 }
 
+static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
+{
+	int err;
+	u32 cfg, tmp[AES_BLOCK_32];
+	size_t total_in_ori = cryp->total_in;
+	struct scatterlist *out_sg_ori = cryp->out_sg;
+	unsigned int i;
+
+	/* 'Special workaround' procedure described in the datasheet */
+
+	/* a) disable ip */
+	stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+	cfg = stm32_cryp_read(cryp, CRYP_CR);
+	cfg &= ~CR_CRYPEN;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* b) Update IV1R */
+	stm32_cryp_write(cryp, CRYP_IV1RR, cryp->gcm_ctr - 2);
+
+	/* c) change mode to CTR */
+	cfg &= ~CR_ALGO_MASK;
+	cfg |= CR_AES_CTR;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* a) enable IP */
+	cfg |= CR_CRYPEN;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* b) pad and write the last block */
+	stm32_cryp_irq_write_block(cryp);
+	cryp->total_in = total_in_ori;
+	err = stm32_cryp_wait_output(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Timeout (write gcm header)\n");
+		return stm32_cryp_finish_req(cryp, err);
+	}
+
+	/* c) get and store encrypted data */
+	stm32_cryp_irq_read_data(cryp);
+	scatterwalk_map_and_copy(tmp, out_sg_ori,
+				 cryp->total_in_save - total_in_ori,
+				 total_in_ori, 0);
+
+	/* d) change mode back to AES GCM */
+	cfg &= ~CR_ALGO_MASK;
+	cfg |= CR_AES_GCM;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* e) change phase to Final */
+	cfg &= ~CR_PH_MASK;
+	cfg |= CR_PH_FINAL;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* f) write padded data */
+	for (i = 0; i < AES_BLOCK_32; i++) {
+		if (cryp->total_in)
+			stm32_cryp_write(cryp, CRYP_DIN, tmp[i]);
+		else
+			stm32_cryp_write(cryp, CRYP_DIN, 0);
+
+		cryp->total_in -= min_t(size_t, sizeof(u32), cryp->total_in);
+	}
+
+	/* g) Empty fifo out */
+	err = stm32_cryp_wait_output(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Timeout (write gcm header)\n");
+		return stm32_cryp_finish_req(cryp, err);
+	}
+
+	for (i = 0; i < AES_BLOCK_32; i++)
+		stm32_cryp_read(cryp, CRYP_DOUT);
+
+	/* h) run the he normal Final phase */
+	stm32_cryp_finish_req(cryp, 0);
+}
+
+static void stm32_cryp_irq_set_npblb(struct stm32_cryp *cryp)
+{
+	u32 cfg, payload_bytes;
+
+	/* disable ip, set NPBLB and reneable ip */
+	cfg = stm32_cryp_read(cryp, CRYP_CR);
+	cfg &= ~CR_CRYPEN;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	payload_bytes = is_decrypt(cryp) ? cryp->total_in - cryp->authsize :
+					   cryp->total_in;
+	cfg |= (cryp->hw_blocksize - payload_bytes) << CR_NBPBL_SHIFT;
+	cfg |= CR_CRYPEN;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+}
+
+static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
+{
+	int err = 0;
+	u32 cfg, iv1tmp;
+	u32 cstmp1[AES_BLOCK_32], cstmp2[AES_BLOCK_32], tmp[AES_BLOCK_32];
+	size_t last_total_out, total_in_ori = cryp->total_in;
+	struct scatterlist *out_sg_ori = cryp->out_sg;
+	unsigned int i;
+
+	/* 'Special workaround' procedure described in the datasheet */
+	cryp->flags |= FLG_CCM_PADDED_WA;
+
+	/* a) disable ip */
+	stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+
+	cfg = stm32_cryp_read(cryp, CRYP_CR);
+	cfg &= ~CR_CRYPEN;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* b) get IV1 from CRYP_CSGCMCCM7 */
+	iv1tmp = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + 7 * 4);
+
+	/* c) Load CRYP_CSGCMCCMxR */
+	for (i = 0; i < ARRAY_SIZE(cstmp1); i++)
+		cstmp1[i] = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + i * 4);
+
+	/* d) Write IV1R */
+	stm32_cryp_write(cryp, CRYP_IV1RR, iv1tmp);
+
+	/* e) change mode to CTR */
+	cfg &= ~CR_ALGO_MASK;
+	cfg |= CR_AES_CTR;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* a) enable IP */
+	cfg |= CR_CRYPEN;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* b) pad and write the last block */
+	stm32_cryp_irq_write_block(cryp);
+	cryp->total_in = total_in_ori;
+	err = stm32_cryp_wait_output(cryp);
+	if (err) {
+		dev_err(cryp->dev, "Timeout (wite ccm padded data)\n");
+		return stm32_cryp_finish_req(cryp, err);
+	}
+
+	/* c) get and store decrypted data */
+	last_total_out = cryp->total_out;
+	stm32_cryp_irq_read_data(cryp);
+
+	memset(tmp, 0, sizeof(tmp));
+	scatterwalk_map_and_copy(tmp, out_sg_ori,
+				 cryp->total_out_save - last_total_out,
+				 last_total_out, 0);
+
+	/* d) Load again CRYP_CSGCMCCMxR */
+	for (i = 0; i < ARRAY_SIZE(cstmp2); i++)
+		cstmp2[i] = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + i * 4);
+
+	/* e) change mode back to AES CCM */
+	cfg &= ~CR_ALGO_MASK;
+	cfg |= CR_AES_CCM;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* f) change phase to header */
+	cfg &= ~CR_PH_MASK;
+	cfg |= CR_PH_HEADER;
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	/* g) XOR and write padded data */
+	for (i = 0; i < ARRAY_SIZE(tmp); i++) {
+		tmp[i] ^= cstmp1[i];
+		tmp[i] ^= cstmp2[i];
+		stm32_cryp_write(cryp, CRYP_DIN, tmp[i]);
+	}
+
+	/* h) wait for completion */
+	err = stm32_cryp_wait_busy(cryp);
+	if (err)
+		dev_err(cryp->dev, "Timeout (wite ccm padded data)\n");
+
+	/* i) run the he normal Final phase */
+	stm32_cryp_finish_req(cryp, err);
+}
+
 static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp)
 {
 	if (unlikely(!cryp->total_in)) {
@@ -845,28 +1483,220 @@ static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp)
 		return;
 	}
 
+	if (unlikely(cryp->total_in < AES_BLOCK_SIZE &&
+		     (stm32_cryp_get_hw_mode(cryp) == CR_AES_GCM) &&
+		     is_encrypt(cryp))) {
+		/* Padding for AES GCM encryption */
+		if (cryp->caps->padding_wa)
+			/* Special case 1 */
+			return stm32_cryp_irq_write_gcm_padded_data(cryp);
+
+		/* Setting padding bytes (NBBLB) */
+		stm32_cryp_irq_set_npblb(cryp);
+	}
+
+	if (unlikely((cryp->total_in - cryp->authsize < AES_BLOCK_SIZE) &&
+		     (stm32_cryp_get_hw_mode(cryp) == CR_AES_CCM) &&
+		     is_decrypt(cryp))) {
+		/* Padding for AES CCM decryption */
+		if (cryp->caps->padding_wa)
+			/* Special case 2 */
+			return stm32_cryp_irq_write_ccm_padded_data(cryp);
+
+		/* Setting padding bytes (NBBLB) */
+		stm32_cryp_irq_set_npblb(cryp);
+	}
+
 	if (is_aes(cryp) && is_ctr(cryp))
 		stm32_cryp_check_ctr_counter(cryp);
 
 	stm32_cryp_irq_write_block(cryp);
 }
 
+static void stm32_cryp_irq_write_gcm_header(struct stm32_cryp *cryp)
+{
+	int err;
+	unsigned int i, j;
+	u32 cfg, *src;
+
+	src = sg_virt(cryp->in_sg) + _walked_in;
+
+	for (i = 0; i < AES_BLOCK_32; i++) {
+		stm32_cryp_write(cryp, CRYP_DIN, *src);
+
+		src = stm32_cryp_next_in(cryp, src, sizeof(u32));
+		cryp->total_in -= min_t(size_t, sizeof(u32), cryp->total_in);
+
+		/* Check if whole header written */
+		if ((cryp->total_in_save - cryp->total_in) ==
+				cryp->areq->assoclen) {
+			/* Write padding if needed */
+			for (j = i + 1; j < AES_BLOCK_32; j++)
+				stm32_cryp_write(cryp, CRYP_DIN, 0);
+
+			/* Wait for completion */
+			err = stm32_cryp_wait_busy(cryp);
+			if (err) {
+				dev_err(cryp->dev, "Timeout (gcm header)\n");
+				return stm32_cryp_finish_req(cryp, err);
+			}
+
+			if (stm32_cryp_get_input_text_len(cryp)) {
+				/* Phase 3 : payload */
+				cfg = stm32_cryp_read(cryp, CRYP_CR);
+				cfg &= ~CR_CRYPEN;
+				stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+				cfg &= ~CR_PH_MASK;
+				cfg |= CR_PH_PAYLOAD;
+				cfg |= CR_CRYPEN;
+				stm32_cryp_write(cryp, CRYP_CR, cfg);
+			} else {
+				/* Phase 4 : tag */
+				stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+				stm32_cryp_finish_req(cryp, 0);
+			}
+
+			break;
+		}
+
+		if (!cryp->total_in)
+			break;
+	}
+}
+
+static void stm32_cryp_irq_write_ccm_header(struct stm32_cryp *cryp)
+{
+	int err;
+	unsigned int i = 0, j, k;
+	u32 alen, cfg, *src;
+	u8 d8[4];
+
+	src = sg_virt(cryp->in_sg) + _walked_in;
+	alen = cryp->areq->assoclen;
+
+	if (!_walked_in) {
+		if (cryp->areq->assoclen <= 65280) {
+			/* Write first u32 of B1 */
+			d8[0] = (alen >> 8) & 0xFF;
+			d8[1] = alen & 0xFF;
+			d8[2] = *((u8 *)src);
+			src = stm32_cryp_next_in(cryp, src, 1);
+			d8[3] = *((u8 *)src);
+			src = stm32_cryp_next_in(cryp, src, 1);
+
+			stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+			i++;
+
+			cryp->total_in -= min_t(size_t, 2, cryp->total_in);
+		} else {
+			/* Build the two first u32 of B1 */
+			d8[0] = 0xFF;
+			d8[1] = 0xFE;
+			d8[2] = alen & 0xFF000000;
+			d8[3] = alen & 0x00FF0000;
+
+			stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+			i++;
+
+			d8[0] = alen & 0x0000FF00;
+			d8[1] = alen & 0x000000FF;
+			d8[2] = *((u8 *)src);
+			src = stm32_cryp_next_in(cryp, src, 1);
+			d8[3] = *((u8 *)src);
+			src = stm32_cryp_next_in(cryp, src, 1);
+
+			stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+			i++;
+
+			cryp->total_in -= min_t(size_t, 2, cryp->total_in);
+		}
+	}
+
+	/* Write next u32 */
+	for (; i < AES_BLOCK_32; i++) {
+		/* Build an u32 */
+		memset(d8, 0, sizeof(u32));
+		for (k = 0; k < sizeof(u32); k++) {
+			d8[k] = *((u8 *)src);
+			src = stm32_cryp_next_in(cryp, src, 1);
+
+			cryp->total_in -= min_t(size_t, 1, cryp->total_in);
+			if ((cryp->total_in_save - cryp->total_in) == alen)
+				break;
+		}
+
+		stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+
+		if ((cryp->total_in_save - cryp->total_in) == alen) {
+			/* Write padding if needed */
+			for (j = i + 1; j < AES_BLOCK_32; j++)
+				stm32_cryp_write(cryp, CRYP_DIN, 0);
+
+			/* Wait for completion */
+			err = stm32_cryp_wait_busy(cryp);
+			if (err) {
+				dev_err(cryp->dev, "Timeout (ccm header)\n");
+				return stm32_cryp_finish_req(cryp, err);
+			}
+
+			if (stm32_cryp_get_input_text_len(cryp)) {
+				/* Phase 3 : payload */
+				cfg = stm32_cryp_read(cryp, CRYP_CR);
+				cfg &= ~CR_CRYPEN;
+				stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+				cfg &= ~CR_PH_MASK;
+				cfg |= CR_PH_PAYLOAD;
+				cfg |= CR_CRYPEN;
+				stm32_cryp_write(cryp, CRYP_CR, cfg);
+			} else {
+				/* Phase 4 : tag */
+				stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+				stm32_cryp_finish_req(cryp, 0);
+			}
+
+			break;
+		}
+	}
+}
+
 static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg)
 {
 	struct stm32_cryp *cryp = arg;
+	u32 ph;
 
 	if (cryp->irq_status & MISR_OUT)
 		/* Output FIFO IRQ: read data */
 		if (unlikely(stm32_cryp_irq_read_data(cryp))) {
 			/* All bytes processed, finish */
 			stm32_cryp_write(cryp, CRYP_IMSCR, 0);
-			stm32_cryp_finish_req(cryp);
+			stm32_cryp_finish_req(cryp, 0);
 			return IRQ_HANDLED;
 		}
 
 	if (cryp->irq_status & MISR_IN) {
-		/* Input FIFO IRQ: write data */
-		stm32_cryp_irq_write_data(cryp);
+		if (is_gcm(cryp)) {
+			ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK;
+			if (unlikely(ph == CR_PH_HEADER))
+				/* Write Header */
+				stm32_cryp_irq_write_gcm_header(cryp);
+			else
+				/* Input FIFO IRQ: write data */
+				stm32_cryp_irq_write_data(cryp);
+			cryp->gcm_ctr++;
+		} else if (is_ccm(cryp)) {
+			ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK;
+			if (unlikely(ph == CR_PH_HEADER))
+				/* Write Header */
+				stm32_cryp_irq_write_ccm_header(cryp);
+			else
+				/* Input FIFO IRQ: write data */
+				stm32_cryp_irq_write_data(cryp);
+		} else {
+			/* Input FIFO IRQ: write data */
+			stm32_cryp_irq_write_data(cryp);
+		}
 	}
 
 	return IRQ_HANDLED;
@@ -1028,8 +1858,62 @@ static struct crypto_alg crypto_algs[] = {
 },
 };
 
+static struct aead_alg aead_algs[] = {
+{
+	.setkey		= stm32_cryp_aes_aead_setkey,
+	.setauthsize	= stm32_cryp_aes_gcm_setauthsize,
+	.encrypt	= stm32_cryp_aes_gcm_encrypt,
+	.decrypt	= stm32_cryp_aes_gcm_decrypt,
+	.init		= stm32_cryp_aes_aead_init,
+	.ivsize		= 12,
+	.maxauthsize	= AES_BLOCK_SIZE,
+
+	.base = {
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "stm32-gcm-aes",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+{
+	.setkey		= stm32_cryp_aes_aead_setkey,
+	.setauthsize	= stm32_cryp_aes_ccm_setauthsize,
+	.encrypt	= stm32_cryp_aes_ccm_encrypt,
+	.decrypt	= stm32_cryp_aes_ccm_decrypt,
+	.init		= stm32_cryp_aes_aead_init,
+	.ivsize		= AES_BLOCK_SIZE,
+	.maxauthsize	= AES_BLOCK_SIZE,
+
+	.base = {
+		.cra_name		= "ccm(aes)",
+		.cra_driver_name	= "stm32-ccm-aes",
+		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+},
+};
+
+static const struct stm32_cryp_caps f7_data = {
+	.swap_final = true,
+	.padding_wa = true,
+};
+
+static const struct stm32_cryp_caps mp1_data = {
+	.swap_final = false,
+	.padding_wa = false,
+};
+
 static const struct of_device_id stm32_dt_ids[] = {
-	{ .compatible = "st,stm32f756-cryp", },
+	{ .compatible = "st,stm32f756-cryp", .data = &f7_data},
+	{ .compatible = "st,stm32mp1-cryp", .data = &mp1_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, stm32_dt_ids);
@@ -1046,6 +1930,10 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 	if (!cryp)
 		return -ENOMEM;
 
+	cryp->caps = of_device_get_match_data(dev);
+	if (!cryp->caps)
+		return -ENODEV;
+
 	cryp->dev = dev;
 
 	mutex_init(&cryp->lock);
@@ -1102,9 +1990,6 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 		goto err_engine1;
 	}
 
-	cryp->engine->prepare_cipher_request = stm32_cryp_prepare_cipher_req;
-	cryp->engine->cipher_one_request = stm32_cryp_cipher_one_req;
-
 	ret = crypto_engine_start(cryp->engine);
 	if (ret) {
 		dev_err(dev, "Could not start crypto engine\n");
@@ -1117,10 +2002,16 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 		goto err_algs;
 	}
 
+	ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+	if (ret)
+		goto err_aead_algs;
+
 	dev_info(dev, "Initialized\n");
 
 	return 0;
 
+err_aead_algs:
+	crypto_unregister_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
 err_algs:
 err_engine2:
 	crypto_engine_exit(cryp->engine);
@@ -1141,6 +2032,7 @@ static int stm32_cryp_remove(struct platform_device *pdev)
 	if (!cryp)
 		return -ENODEV;
 
+	crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
 	crypto_unregister_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
 
 	crypto_engine_exit(cryp->engine);
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 4ca4a264a833..981e45692695 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -122,6 +122,7 @@ enum stm32_hash_data_format {
 #define HASH_DMA_THRESHOLD		50
 
 struct stm32_hash_ctx {
+	struct crypto_engine_ctx enginectx;
 	struct stm32_hash_dev	*hdev;
 	unsigned long		flags;
 
@@ -626,7 +627,7 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
 			writesl(hdev->io_base + HASH_DIN, buffer,
 				DIV_ROUND_UP(ncp, sizeof(u32)));
 		}
-		stm32_hash_set_nblw(hdev, DIV_ROUND_UP(ncp, sizeof(u32)));
+		stm32_hash_set_nblw(hdev, ncp);
 		reg = stm32_hash_read(hdev, HASH_STR);
 		reg |= HASH_STR_DCAL;
 		stm32_hash_write(hdev, HASH_STR, reg);
@@ -743,13 +744,15 @@ static int stm32_hash_final_req(struct stm32_hash_dev *hdev)
 	struct ahash_request *req = hdev->req;
 	struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
 	int err;
+	int buflen = rctx->bufcnt;
+
+	rctx->bufcnt = 0;
 
 	if (!(rctx->flags & HASH_FLAGS_CPU))
 		err = stm32_hash_dma_send(hdev);
 	else
-		err = stm32_hash_xmit_cpu(hdev, rctx->buffer, rctx->bufcnt, 1);
+		err = stm32_hash_xmit_cpu(hdev, rctx->buffer, buflen, 1);
 
-	rctx->bufcnt = 0;
 
 	return err;
 }
@@ -828,15 +831,19 @@ static int stm32_hash_hw_init(struct stm32_hash_dev *hdev,
 	return 0;
 }
 
+static int stm32_hash_one_request(struct crypto_engine *engine, void *areq);
+static int stm32_hash_prepare_req(struct crypto_engine *engine, void *areq);
+
 static int stm32_hash_handle_queue(struct stm32_hash_dev *hdev,
 				   struct ahash_request *req)
 {
 	return crypto_transfer_hash_request_to_engine(hdev->engine, req);
 }
 
-static int stm32_hash_prepare_req(struct crypto_engine *engine,
-				  struct ahash_request *req)
+static int stm32_hash_prepare_req(struct crypto_engine *engine, void *areq)
 {
+	struct ahash_request *req = container_of(areq, struct ahash_request,
+						 base);
 	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
 	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
 	struct stm32_hash_request_ctx *rctx;
@@ -854,9 +861,10 @@ static int stm32_hash_prepare_req(struct crypto_engine *engine,
 	return stm32_hash_hw_init(hdev, rctx);
 }
 
-static int stm32_hash_one_request(struct crypto_engine *engine,
-				  struct ahash_request *req)
+static int stm32_hash_one_request(struct crypto_engine *engine, void *areq)
 {
+	struct ahash_request *req = container_of(areq, struct ahash_request,
+						 base);
 	struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
 	struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
 	struct stm32_hash_request_ctx *rctx;
@@ -1033,6 +1041,9 @@ static int stm32_hash_cra_init_algs(struct crypto_tfm *tfm,
 	if (algs_hmac_name)
 		ctx->flags |= HASH_FLAGS_HMAC;
 
+	ctx->enginectx.op.do_one_request = stm32_hash_one_request;
+	ctx->enginectx.op.prepare_request = stm32_hash_prepare_req;
+	ctx->enginectx.op.unprepare_request = NULL;
 	return 0;
 }
 
@@ -1096,6 +1107,8 @@ static irqreturn_t stm32_hash_irq_handler(int irq, void *dev_id)
 		reg &= ~HASH_SR_OUTPUT_READY;
 		stm32_hash_write(hdev, HASH_SR, reg);
 		hdev->flags |= HASH_FLAGS_OUTPUT_READY;
+		/* Disable IT*/
+		stm32_hash_write(hdev, HASH_IMR, 0);
 		return IRQ_WAKE_THREAD;
 	}
 
@@ -1404,18 +1417,19 @@ MODULE_DEVICE_TABLE(of, stm32_hash_of_match);
 static int stm32_hash_get_of_match(struct stm32_hash_dev *hdev,
 				   struct device *dev)
 {
-	int err;
-
 	hdev->pdata = of_device_get_match_data(dev);
 	if (!hdev->pdata) {
 		dev_err(dev, "no compatible OF match\n");
 		return -EINVAL;
 	}
 
-	err = of_property_read_u32(dev->of_node, "dma-maxburst",
-				   &hdev->dma_maxburst);
+	if (of_property_read_u32(dev->of_node, "dma-maxburst",
+				 &hdev->dma_maxburst)) {
+		dev_info(dev, "dma-maxburst not specified, using 0\n");
+		hdev->dma_maxburst = 0;
+	}
 
-	return err;
+	return 0;
 }
 
 static int stm32_hash_probe(struct platform_device *pdev)
@@ -1493,9 +1507,6 @@ static int stm32_hash_probe(struct platform_device *pdev)
 		goto err_engine;
 	}
 
-	hdev->engine->prepare_hash_request = stm32_hash_prepare_req;
-	hdev->engine->hash_one_request = stm32_hash_one_request;
-
 	ret = crypto_engine_start(hdev->engine);
 	if (ret)
 		goto err_engine_start;
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
index 1547cbe13dc2..a81d89b3b7d8 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
@@ -451,6 +451,7 @@ static struct platform_driver sun4i_ss_driver = {
 
 module_platform_driver(sun4i_ss_driver);
 
+MODULE_ALIAS("platform:sun4i-ss");
 MODULE_DESCRIPTION("Allwinner Security System cryptographic accelerator");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Corentin LABBE <clabbe.montjoie@gmail.com>");
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6882fa2f8bad..7cebf0a6ffbc 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -104,16 +104,34 @@ static void to_talitos_ptr_ext_or(struct talitos_ptr *ptr, u8 val, bool is_sec1)
 /*
  * map virtual single (contiguous) pointer to h/w descriptor pointer
  */
+static void __map_single_talitos_ptr(struct device *dev,
+				     struct talitos_ptr *ptr,
+				     unsigned int len, void *data,
+				     enum dma_data_direction dir,
+				     unsigned long attrs)
+{
+	dma_addr_t dma_addr = dma_map_single_attrs(dev, data, len, dir, attrs);
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
+
+	to_talitos_ptr(ptr, dma_addr, len, is_sec1);
+}
+
 static void map_single_talitos_ptr(struct device *dev,
 				   struct talitos_ptr *ptr,
 				   unsigned int len, void *data,
 				   enum dma_data_direction dir)
 {
-	dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
-	struct talitos_private *priv = dev_get_drvdata(dev);
-	bool is_sec1 = has_ftr_sec1(priv);
+	__map_single_talitos_ptr(dev, ptr, len, data, dir, 0);
+}
 
-	to_talitos_ptr(ptr, dma_addr, len, is_sec1);
+static void map_single_talitos_ptr_nosync(struct device *dev,
+					  struct talitos_ptr *ptr,
+					  unsigned int len, void *data,
+					  enum dma_data_direction dir)
+{
+	__map_single_talitos_ptr(dev, ptr, len, data, dir,
+				 DMA_ATTR_SKIP_CPU_SYNC);
 }
 
 /*
@@ -832,8 +850,6 @@ struct talitos_ctx {
 	unsigned int keylen;
 	unsigned int enckeylen;
 	unsigned int authkeylen;
-	dma_addr_t dma_buf;
-	dma_addr_t dma_hw_context;
 };
 
 #define HASH_MAX_BLOCK_SIZE		SHA512_BLOCK_SIZE
@@ -888,10 +904,12 @@ static int aead_setkey(struct crypto_aead *authenc,
 	ctx->dma_key = dma_map_single(dev, ctx->key, ctx->keylen,
 				      DMA_TO_DEVICE);
 
+	memzero_explicit(&keys, sizeof(keys));
 	return 0;
 
 badkey:
 	crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
@@ -1130,10 +1148,10 @@ next:
 	return count;
 }
 
-static int talitos_sg_map(struct device *dev, struct scatterlist *src,
-		   unsigned int len, struct talitos_edesc *edesc,
-		   struct talitos_ptr *ptr,
-		   int sg_count, unsigned int offset, int tbl_off)
+static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src,
+			      unsigned int len, struct talitos_edesc *edesc,
+			      struct talitos_ptr *ptr, int sg_count,
+			      unsigned int offset, int tbl_off, int elen)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
@@ -1142,6 +1160,7 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
 		to_talitos_ptr(ptr, 0, 0, is_sec1);
 		return 1;
 	}
+	to_talitos_ptr_ext_set(ptr, elen, is_sec1);
 	if (sg_count == 1) {
 		to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1);
 		return sg_count;
@@ -1150,7 +1169,7 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
 		to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, len, is_sec1);
 		return sg_count;
 	}
-	sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len,
+	sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len + elen,
 					 &edesc->link_tbl[tbl_off]);
 	if (sg_count == 1) {
 		/* Only one segment now, so no link tbl needed*/
@@ -1164,6 +1183,15 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src,
 	return sg_count;
 }
 
+static int talitos_sg_map(struct device *dev, struct scatterlist *src,
+			  unsigned int len, struct talitos_edesc *edesc,
+			  struct talitos_ptr *ptr, int sg_count,
+			  unsigned int offset, int tbl_off)
+{
+	return talitos_sg_map_ext(dev, src, len, edesc, ptr, sg_count, offset,
+				  tbl_off, 0);
+}
+
 /*
  * fill in and submit ipsec_esp descriptor
  */
@@ -1181,7 +1209,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	int tbl_off = 0;
 	int sg_count, ret;
-	int sg_link_tbl_len;
+	int elen = 0;
 	bool sync_needed = false;
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	bool is_sec1 = has_ftr_sec1(priv);
@@ -1223,17 +1251,11 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	 * extent is bytes of HMAC postpended to ciphertext,
 	 * typically 12 for ipsec
 	 */
-	sg_link_tbl_len = cryptlen;
-
-	if (is_ipsec_esp) {
-		to_talitos_ptr_ext_set(&desc->ptr[4], authsize, is_sec1);
+	if (is_ipsec_esp && (desc->hdr & DESC_HDR_MODE1_MDEU_CICV))
+		elen = authsize;
 
-		if (desc->hdr & DESC_HDR_MODE1_MDEU_CICV)
-			sg_link_tbl_len += authsize;
-	}
-
-	ret = talitos_sg_map(dev, areq->src, sg_link_tbl_len, edesc,
-			     &desc->ptr[4], sg_count, areq->assoclen, tbl_off);
+	ret = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[4],
+				 sg_count, areq->assoclen, tbl_off, elen);
 
 	if (ret > 1) {
 		tbl_off += ret;
@@ -1404,7 +1426,6 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 
 	edesc = kmalloc(alloc_len, GFP_DMA | flags);
 	if (!edesc) {
-		dev_err(dev, "could not allocate edescriptor\n");
 		err = ERR_PTR(-ENOMEM);
 		goto error_sg;
 	}
@@ -1690,9 +1711,30 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
 				       struct ahash_request *areq)
 {
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
+	struct talitos_desc *desc = &edesc->desc;
+	struct talitos_desc *desc2 = desc + 1;
+
+	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+	if (desc->next_desc &&
+	    desc->ptr[5].ptr != desc2->ptr[5].ptr)
+		unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
 
 	talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0);
 
+	/* When using hashctx-in, must unmap it. */
+	if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1))
+		unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1],
+					 DMA_TO_DEVICE);
+	else if (desc->next_desc)
+		unmap_single_talitos_ptr(dev, &desc2->ptr[1],
+					 DMA_TO_DEVICE);
+
+	if (is_sec1 && req_ctx->nbuf)
+		unmap_single_talitos_ptr(dev, &desc->ptr[3],
+					 DMA_TO_DEVICE);
+
 	if (edesc->dma_len)
 		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
 				 DMA_BIDIRECTIONAL);
@@ -1766,8 +1808,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 
 	/* hash context in */
 	if (!req_ctx->first || req_ctx->swinit) {
-		to_talitos_ptr(&desc->ptr[1], ctx->dma_hw_context,
-			       req_ctx->hw_context_size, is_sec1);
+		map_single_talitos_ptr_nosync(dev, &desc->ptr[1],
+					      req_ctx->hw_context_size,
+					      req_ctx->hw_context,
+					      DMA_TO_DEVICE);
 		req_ctx->swinit = 0;
 	}
 	/* Indicate next op is not the first. */
@@ -1793,10 +1837,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	 * data in
 	 */
 	if (is_sec1 && req_ctx->nbuf) {
-		dma_addr_t dma_buf = ctx->dma_buf + req_ctx->buf_idx *
-						    HASH_MAX_BLOCK_SIZE;
-
-		to_talitos_ptr(&desc->ptr[3], dma_buf, req_ctx->nbuf, is_sec1);
+		map_single_talitos_ptr(dev, &desc->ptr[3], req_ctx->nbuf,
+				       req_ctx->buf[req_ctx->buf_idx],
+				       DMA_TO_DEVICE);
 	} else {
 		sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
 					  &desc->ptr[3], sg_count, offset, 0);
@@ -1812,8 +1855,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 				       crypto_ahash_digestsize(tfm),
 				       areq->result, DMA_FROM_DEVICE);
 	else
-		to_talitos_ptr(&desc->ptr[5], ctx->dma_hw_context,
-			       req_ctx->hw_context_size, is_sec1);
+		map_single_talitos_ptr_nosync(dev, &desc->ptr[5],
+					      req_ctx->hw_context_size,
+					      req_ctx->hw_context,
+					      DMA_FROM_DEVICE);
 
 	/* last DWORD empty */
 
@@ -1832,9 +1877,14 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 		desc->hdr |= DESC_HDR_MODE0_MDEU_CONT;
 		desc->hdr &= ~DESC_HDR_DONE_NOTIFY;
 
-		to_talitos_ptr(&desc2->ptr[1], ctx->dma_hw_context,
-			       req_ctx->hw_context_size, is_sec1);
-
+		if (desc->ptr[1].ptr)
+			copy_talitos_ptr(&desc2->ptr[1], &desc->ptr[1],
+					 is_sec1);
+		else
+			map_single_talitos_ptr_nosync(dev, &desc2->ptr[1],
+						      req_ctx->hw_context_size,
+						      req_ctx->hw_context,
+						      DMA_TO_DEVICE);
 		copy_talitos_ptr(&desc2->ptr[2], &desc->ptr[2], is_sec1);
 		sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc,
 					  &desc2->ptr[3], sg_count, offset, 0);
@@ -1842,8 +1892,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 			sync_needed = true;
 		copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
 		if (req_ctx->last)
-			to_talitos_ptr(&desc->ptr[5], ctx->dma_hw_context,
-				       req_ctx->hw_context_size, is_sec1);
+			map_single_talitos_ptr_nosync(dev, &desc->ptr[5],
+						      req_ctx->hw_context_size,
+						      req_ctx->hw_context,
+						      DMA_FROM_DEVICE);
 
 		next_desc = dma_map_single(dev, &desc2->hdr1, TALITOS_DESC_SIZE,
 					   DMA_BIDIRECTIONAL);
@@ -1885,8 +1937,7 @@ static int ahash_init(struct ahash_request *areq)
 	struct device *dev = ctx->dev;
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
 	unsigned int size;
-	struct talitos_private *priv = dev_get_drvdata(dev);
-	bool is_sec1 = has_ftr_sec1(priv);
+	dma_addr_t dma;
 
 	/* Initialize the context */
 	req_ctx->buf_idx = 0;
@@ -1898,18 +1949,10 @@ static int ahash_init(struct ahash_request *areq)
 			: TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
 	req_ctx->hw_context_size = size;
 
-	if (ctx->dma_hw_context)
-		dma_unmap_single(dev, ctx->dma_hw_context, size,
-				 DMA_BIDIRECTIONAL);
-	ctx->dma_hw_context = dma_map_single(dev, req_ctx->hw_context, size,
-					     DMA_BIDIRECTIONAL);
-	if (ctx->dma_buf)
-		dma_unmap_single(dev, ctx->dma_buf, sizeof(req_ctx->buf),
-				 DMA_TO_DEVICE);
-	if (is_sec1)
-		ctx->dma_buf = dma_map_single(dev, req_ctx->buf,
-					      sizeof(req_ctx->buf),
-					      DMA_TO_DEVICE);
+	dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+			     DMA_TO_DEVICE);
+	dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE);
+
 	return 0;
 }
 
@@ -1920,12 +1963,6 @@ static int ahash_init(struct ahash_request *areq)
 static int ahash_init_sha224_swinit(struct ahash_request *areq)
 {
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct device *dev = ctx->dev;
-
-	ahash_init(areq);
-	req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
 
 	req_ctx->hw_context[0] = SHA224_H0;
 	req_ctx->hw_context[1] = SHA224_H1;
@@ -1940,8 +1977,8 @@ static int ahash_init_sha224_swinit(struct ahash_request *areq)
 	req_ctx->hw_context[8] = 0;
 	req_ctx->hw_context[9] = 0;
 
-	dma_sync_single_for_device(dev, ctx->dma_hw_context,
-				   req_ctx->hw_context_size, DMA_TO_DEVICE);
+	ahash_init(areq);
+	req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
 
 	return 0;
 }
@@ -2046,13 +2083,6 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 	/* request SEC to INIT hash. */
 	if (req_ctx->first && !req_ctx->swinit)
 		edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT;
-	if (is_sec1) {
-		dma_addr_t dma_buf = ctx->dma_buf + req_ctx->buf_idx *
-						    HASH_MAX_BLOCK_SIZE;
-
-		dma_sync_single_for_device(dev, dma_buf,
-					   req_ctx->nbuf, DMA_TO_DEVICE);
-	}
 
 	/* When the tfm context has a keylen, it's an HMAC.
 	 * A first or last (ie. not middle) descriptor must request HMAC.
@@ -2106,12 +2136,15 @@ static int ahash_export(struct ahash_request *areq, void *out)
 {
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
 	struct talitos_export_state *export = out;
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
-	struct talitos_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct device *dev = ctx->dev;
+	dma_addr_t dma;
+
+	dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+			     DMA_FROM_DEVICE);
+	dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_FROM_DEVICE);
 
-	dma_sync_single_for_cpu(dev, ctx->dma_hw_context,
-				req_ctx->hw_context_size, DMA_FROM_DEVICE);
 	memcpy(export->hw_context, req_ctx->hw_context,
 	       req_ctx->hw_context_size);
 	memcpy(export->buf, req_ctx->buf[req_ctx->buf_idx], req_ctx->nbuf);
@@ -2128,39 +2161,29 @@ static int ahash_import(struct ahash_request *areq, const void *in)
 {
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
-	const struct talitos_export_state *export = in;
-	unsigned int size;
 	struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct device *dev = ctx->dev;
-	struct talitos_private *priv = dev_get_drvdata(dev);
-	bool is_sec1 = has_ftr_sec1(priv);
+	const struct talitos_export_state *export = in;
+	unsigned int size;
+	dma_addr_t dma;
 
 	memset(req_ctx, 0, sizeof(*req_ctx));
 	size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
 			? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
 			: TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
 	req_ctx->hw_context_size = size;
-	if (ctx->dma_hw_context)
-		dma_unmap_single(dev, ctx->dma_hw_context, size,
-				 DMA_BIDIRECTIONAL);
-
 	memcpy(req_ctx->hw_context, export->hw_context, size);
-	ctx->dma_hw_context = dma_map_single(dev, req_ctx->hw_context, size,
-					     DMA_BIDIRECTIONAL);
-	if (ctx->dma_buf)
-		dma_unmap_single(dev, ctx->dma_buf, sizeof(req_ctx->buf),
-				 DMA_TO_DEVICE);
 	memcpy(req_ctx->buf[0], export->buf, export->nbuf);
-	if (is_sec1)
-		ctx->dma_buf = dma_map_single(dev, req_ctx->buf,
-					      sizeof(req_ctx->buf),
-					      DMA_TO_DEVICE);
 	req_ctx->swinit = export->swinit;
 	req_ctx->first = export->first;
 	req_ctx->last = export->last;
 	req_ctx->to_hash_later = export->to_hash_later;
 	req_ctx->nbuf = export->nbuf;
 
+	dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+			     DMA_TO_DEVICE);
+	dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE);
+
 	return 0;
 }
 
@@ -3064,27 +3087,6 @@ static void talitos_cra_exit(struct crypto_tfm *tfm)
 		dma_unmap_single(dev, ctx->dma_key, ctx->keylen, DMA_TO_DEVICE);
 }
 
-static void talitos_cra_exit_ahash(struct crypto_tfm *tfm)
-{
-	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct device *dev = ctx->dev;
-	unsigned int size;
-
-	talitos_cra_exit(tfm);
-
-	size = (crypto_ahash_digestsize(__crypto_ahash_cast(tfm)) <=
-		SHA256_DIGEST_SIZE)
-	       ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
-	       : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
-
-	if (ctx->dma_hw_context)
-		dma_unmap_single(dev, ctx->dma_hw_context, size,
-				 DMA_BIDIRECTIONAL);
-	if (ctx->dma_buf)
-		dma_unmap_single(dev, ctx->dma_buf, HASH_MAX_BLOCK_SIZE * 2,
-				 DMA_TO_DEVICE);
-}
-
 /*
  * given the alg's descriptor header template, determine whether descriptor
  * type and primary/secondary execution units required match the hw
@@ -3183,7 +3185,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 	case CRYPTO_ALG_TYPE_AHASH:
 		alg = &t_alg->algt.alg.hash.halg.base;
 		alg->cra_init = talitos_cra_init_ahash;
-		alg->cra_exit = talitos_cra_exit_ahash;
+		alg->cra_exit = talitos_cra_exit;
 		alg->cra_type = &crypto_ahash_type;
 		t_alg->algt.alg.hash.init = ahash_init;
 		t_alg->algt.alg.hash.update = ahash_update;
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 765f53e548ab..cb31b59c9d53 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1404,9 +1404,8 @@ static void cryp_algs_unregister_all(void)
 static int ux500_cryp_probe(struct platform_device *pdev)
 {
 	int ret;
-	int cryp_error = 0;
-	struct resource *res = NULL;
-	struct resource *res_irq = NULL;
+	struct resource *res;
+	struct resource *res_irq;
 	struct cryp_device_data *device_data;
 	struct cryp_protection_config prot = {
 		.privilege_access = CRYP_STATE_ENABLE
@@ -1416,7 +1415,6 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 	dev_dbg(dev, "[%s]", __func__);
 	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_ATOMIC);
 	if (!device_data) {
-		dev_err(dev, "[%s]: kzalloc() failed!", __func__);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1479,15 +1477,13 @@ static int ux500_cryp_probe(struct platform_device *pdev)
 		goto out_clk_unprepare;
 	}
 
-	cryp_error = cryp_check(device_data);
-	if (cryp_error != 0) {
-		dev_err(dev, "[%s]: cryp_init() failed!", __func__);
+	if (cryp_check(device_data)) {
+		dev_err(dev, "[%s]: cryp_check() failed!", __func__);
 		ret = -EINVAL;
 		goto out_power;
 	}
 
-	cryp_error = cryp_configure_protection(device_data, &prot);
-	if (cryp_error != 0) {
+	if (cryp_configure_protection(device_data, &prot)) {
 		dev_err(dev, "[%s]: cryp_configure_protection() failed!",
 			__func__);
 		ret = -EINVAL;
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 9acccad26928..2d0a677bcc76 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1403,6 +1403,16 @@ out:
 	return ret1 ? ret1 : ret2;
 }
 
+static int ahash_noimport(struct ahash_request *req, const void *in)
+{
+	return -ENOSYS;
+}
+
+static int ahash_noexport(struct ahash_request *req, void *out)
+{
+	return -ENOSYS;
+}
+
 static int hmac_sha1_init(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -1507,6 +1517,8 @@ static struct hash_algo_template hash_algs[] = {
 			.update = ahash_update,
 			.final = ahash_final,
 			.digest = ahash_sha1_digest,
+			.export = ahash_noexport,
+			.import = ahash_noimport,
 			.halg.digestsize = SHA1_DIGEST_SIZE,
 			.halg.statesize = sizeof(struct hash_ctx),
 			.halg.base = {
@@ -1529,6 +1541,8 @@ static struct hash_algo_template hash_algs[] = {
 			.update	= ahash_update,
 			.final = ahash_final,
 			.digest = ahash_sha256_digest,
+			.export = ahash_noexport,
+			.import = ahash_noimport,
 			.halg.digestsize = SHA256_DIGEST_SIZE,
 			.halg.statesize = sizeof(struct hash_ctx),
 			.halg.base = {
@@ -1553,6 +1567,8 @@ static struct hash_algo_template hash_algs[] = {
 			.final = ahash_final,
 			.digest = hmac_sha1_digest,
 			.setkey = hmac_sha1_setkey,
+			.export = ahash_noexport,
+			.import = ahash_noimport,
 			.halg.digestsize = SHA1_DIGEST_SIZE,
 			.halg.statesize = sizeof(struct hash_ctx),
 			.halg.base = {
@@ -1577,6 +1593,8 @@ static struct hash_algo_template hash_algs[] = {
 			.final = ahash_final,
 			.digest = hmac_sha256_digest,
 			.setkey = hmac_sha256_setkey,
+			.export = ahash_noexport,
+			.import = ahash_noimport,
 			.halg.digestsize = SHA256_DIGEST_SIZE,
 			.halg.statesize = sizeof(struct hash_ctx),
 			.halg.base = {
diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig
index 5db07495ddc5..a4324b1383a4 100644
--- a/drivers/crypto/virtio/Kconfig
+++ b/drivers/crypto/virtio/Kconfig
@@ -2,7 +2,6 @@ config CRYPTO_DEV_VIRTIO
 	tristate "VirtIO crypto driver"
 	depends on VIRTIO
 	select CRYPTO_AEAD
-	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_ENGINE
 	default m
diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c
index abe8c15450df..ba190cfa7aa1 100644
--- a/drivers/crypto/virtio/virtio_crypto_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_algs.c
@@ -29,6 +29,7 @@
 
 
 struct virtio_crypto_ablkcipher_ctx {
+	struct crypto_engine_ctx enginectx;
 	struct virtio_crypto *vcrypto;
 	struct crypto_tfm *tfm;
 
@@ -491,7 +492,7 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
 	vc_sym_req->ablkcipher_req = req;
 	vc_sym_req->encrypt = true;
 
-	return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
+	return crypto_transfer_ablkcipher_request_to_engine(data_vq->engine, req);
 }
 
 static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
@@ -511,7 +512,7 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
 	vc_sym_req->ablkcipher_req = req;
 	vc_sym_req->encrypt = false;
 
-	return crypto_transfer_cipher_request_to_engine(data_vq->engine, req);
+	return crypto_transfer_ablkcipher_request_to_engine(data_vq->engine, req);
 }
 
 static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
@@ -521,6 +522,9 @@ static int virtio_crypto_ablkcipher_init(struct crypto_tfm *tfm)
 	tfm->crt_ablkcipher.reqsize = sizeof(struct virtio_crypto_sym_request);
 	ctx->tfm = tfm;
 
+	ctx->enginectx.op.do_one_request = virtio_crypto_ablkcipher_crypt_req;
+	ctx->enginectx.op.prepare_request = NULL;
+	ctx->enginectx.op.unprepare_request = NULL;
 	return 0;
 }
 
@@ -538,9 +542,9 @@ static void virtio_crypto_ablkcipher_exit(struct crypto_tfm *tfm)
 }
 
 int virtio_crypto_ablkcipher_crypt_req(
-	struct crypto_engine *engine,
-	struct ablkcipher_request *req)
+	struct crypto_engine *engine, void *vreq)
 {
+	struct ablkcipher_request *req = container_of(vreq, struct ablkcipher_request, base);
 	struct virtio_crypto_sym_request *vc_sym_req =
 				ablkcipher_request_ctx(req);
 	struct virtio_crypto_request *vc_req = &vc_sym_req->base;
@@ -561,8 +565,8 @@ static void virtio_crypto_ablkcipher_finalize_req(
 	struct ablkcipher_request *req,
 	int err)
 {
-	crypto_finalize_cipher_request(vc_sym_req->base.dataq->engine,
-					req, err);
+	crypto_finalize_ablkcipher_request(vc_sym_req->base.dataq->engine,
+					   req, err);
 	kzfree(vc_sym_req->iv);
 	virtcrypto_clear_request(&vc_sym_req->base);
 }
diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h
index e976539a05d9..66501a5a2b7b 100644
--- a/drivers/crypto/virtio/virtio_crypto_common.h
+++ b/drivers/crypto/virtio/virtio_crypto_common.h
@@ -24,7 +24,6 @@
 #include <linux/spinlock.h>
 #include <crypto/aead.h>
 #include <crypto/aes.h>
-#include <crypto/authenc.h>
 #include <crypto/engine.h>
 
 
@@ -107,8 +106,7 @@ struct virtio_crypto *virtcrypto_get_dev_node(int node);
 int virtcrypto_dev_start(struct virtio_crypto *vcrypto);
 void virtcrypto_dev_stop(struct virtio_crypto *vcrypto);
 int virtio_crypto_ablkcipher_crypt_req(
-	struct crypto_engine *engine,
-	struct ablkcipher_request *req);
+	struct crypto_engine *engine, void *vreq);
 
 void
 virtcrypto_clear_request(struct virtio_crypto_request *vc_req);
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index ff1410a32c2b..83326986c113 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -111,9 +111,6 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
 			ret = -ENOMEM;
 			goto err_engine;
 		}
-
-		vi->data_vq[i].engine->cipher_one_request =
-			virtio_crypto_ablkcipher_crypt_req;
 	}
 
 	kfree(names);
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index c81c79d01d93..46115a392098 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -31,7 +31,6 @@
 enum evdev_clock_type {
 	EV_CLK_REAL = 0,
 	EV_CLK_MONO,
-	EV_CLK_BOOT,
 	EV_CLK_MAX
 };
 
@@ -198,12 +197,10 @@ static int evdev_set_clk_type(struct evdev_client *client, unsigned int clkid)
 	case CLOCK_REALTIME:
 		clk_type = EV_CLK_REAL;
 		break;
+	case CLOCK_BOOTTIME:
 	case CLOCK_MONOTONIC:
 		clk_type = EV_CLK_MONO;
 		break;
-	case CLOCK_BOOTTIME:
-		clk_type = EV_CLK_BOOT;
-		break;
 	default:
 		return -EINVAL;
 	}
@@ -314,8 +311,6 @@ static void evdev_events(struct input_handle *handle,
 
 	ev_time[EV_CLK_MONO] = ktime_get();
 	ev_time[EV_CLK_REAL] = ktime_mono_to_real(ev_time[EV_CLK_MONO]);
-	ev_time[EV_CLK_BOOT] = ktime_mono_to_any(ev_time[EV_CLK_MONO],
-						 TK_OFFS_BOOT);
 
 	rcu_read_lock();
 
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index d913aec85109..60d5982d8234 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -286,6 +286,11 @@ config IRQ_MXS
 	select IRQ_DOMAIN
 	select STMP_DEVICE
 
+config MSCC_OCELOT_IRQ
+	bool
+	select IRQ_DOMAIN
+	select GENERIC_IRQ_CHIP
+
 config MVEBU_GICP
 	bool
 
@@ -351,4 +356,13 @@ config GOLDFISH_PIC
          Say yes here to enable Goldfish interrupt controller driver used
          for Goldfish based virtual platforms.
 
+config QCOM_PDC
+	bool "QCOM PDC"
+	depends on ARCH_QCOM
+	select IRQ_DOMAIN
+	select IRQ_DOMAIN_HIERARCHY
+	help
+	  Power Domain Controller driver to manage and configure wakeup
+	  IRQs for Qualcomm Technologies Inc (QTI) mobile chips.
+
 endmenu
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 110de02bb215..280884068959 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_ARCH_SA1100)		+= irq-sa11x0.o
 obj-$(CONFIG_INGENIC_IRQ)		+= irq-ingenic.o
 obj-$(CONFIG_IMX_GPCV2)			+= irq-imx-gpcv2.o
 obj-$(CONFIG_PIC32_EVIC)		+= irq-pic32-evic.o
+obj-$(CONFIG_MSCC_OCELOT_IRQ)		+= irq-mscc-ocelot.o
 obj-$(CONFIG_MVEBU_GICP)		+= irq-mvebu-gicp.o
 obj-$(CONFIG_MVEBU_ICU)			+= irq-mvebu-icu.o
 obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
@@ -84,3 +85,4 @@ obj-$(CONFIG_ARCH_SYNQUACER)		+= irq-sni-exiu.o
 obj-$(CONFIG_MESON_IRQ_GPIO)		+= irq-meson-gpio.o
 obj-$(CONFIG_GOLDFISH_PIC) 		+= irq-goldfish-pic.o
 obj-$(CONFIG_NDS32)			+= irq-ativic32.o
+obj-$(CONFIG_QCOM_PDC)			+= qcom-pdc.o
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 30017df5b54c..01e673c680cd 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -21,6 +21,8 @@
 
 #include "irq-gic-common.h"
 
+static DEFINE_RAW_SPINLOCK(irq_controller_lock);
+
 static const struct gic_kvm_info *gic_kvm_info;
 
 const struct gic_kvm_info *gic_get_kvm_info(void)
@@ -53,11 +55,13 @@ int gic_configure_irq(unsigned int irq, unsigned int type,
 	u32 confoff = (irq / 16) * 4;
 	u32 val, oldval;
 	int ret = 0;
+	unsigned long flags;
 
 	/*
 	 * Read current configuration register, and insert the config
 	 * for "irq", depending on "type".
 	 */
+	raw_spin_lock_irqsave(&irq_controller_lock, flags);
 	val = oldval = readl_relaxed(base + GIC_DIST_CONFIG + confoff);
 	if (type & IRQ_TYPE_LEVEL_MASK)
 		val &= ~confmask;
@@ -65,8 +69,10 @@ int gic_configure_irq(unsigned int irq, unsigned int type,
 		val |= confmask;
 
 	/* If the current configuration is the same, then we are done */
-	if (val == oldval)
+	if (val == oldval) {
+		raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 		return 0;
+	}
 
 	/*
 	 * Write back the new configuration, and possibly re-enable
@@ -84,6 +90,7 @@ int gic_configure_irq(unsigned int irq, unsigned int type,
 			pr_warn("GIC: PPI%d is secure or misconfigured\n",
 				irq - 16);
 	}
+	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 
 	if (sync_access)
 		sync_access();
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 2cbb19cddbf8..2982e93d2369 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -33,6 +33,7 @@
 #include <linux/of_platform.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/syscore_ops.h>
 
 #include <linux/irqchip.h>
 #include <linux/irqchip/arm-gic-v3.h>
@@ -46,6 +47,7 @@
 #define ITS_FLAGS_CMDQ_NEEDS_FLUSHING		(1ULL << 0)
 #define ITS_FLAGS_WORKAROUND_CAVIUM_22375	(1ULL << 1)
 #define ITS_FLAGS_WORKAROUND_CAVIUM_23144	(1ULL << 2)
+#define ITS_FLAGS_SAVE_SUSPEND_STATE		(1ULL << 3)
 
 #define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING	(1 << 0)
 
@@ -101,6 +103,8 @@ struct its_node {
 	struct its_collection	*collections;
 	struct fwnode_handle	*fwnode_handle;
 	u64			(*get_msi_base)(struct its_device *its_dev);
+	u64			cbaser_save;
+	u32			ctlr_save;
 	struct list_head	its_device_list;
 	u64			flags;
 	unsigned long		list_nr;
@@ -1875,16 +1879,6 @@ static void its_cpu_init_lpis(void)
 		gic_data_rdist()->pend_page = pend_page;
 	}
 
-	/* Disable LPIs */
-	val = readl_relaxed(rbase + GICR_CTLR);
-	val &= ~GICR_CTLR_ENABLE_LPIS;
-	writel_relaxed(val, rbase + GICR_CTLR);
-
-	/*
-	 * Make sure any change to the table is observable by the GIC.
-	 */
-	dsb(sy);
-
 	/* set PROPBASE */
 	val = (page_to_phys(gic_rdists->prop_page) |
 	       GICR_PROPBASER_InnerShareable |
@@ -1938,52 +1932,53 @@ static void its_cpu_init_lpis(void)
 	dsb(sy);
 }
 
-static void its_cpu_init_collection(void)
+static void its_cpu_init_collection(struct its_node *its)
 {
-	struct its_node *its;
-	int cpu;
-
-	spin_lock(&its_lock);
-	cpu = smp_processor_id();
-
-	list_for_each_entry(its, &its_nodes, entry) {
-		u64 target;
+	int cpu = smp_processor_id();
+	u64 target;
 
-		/* avoid cross node collections and its mapping */
-		if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) {
-			struct device_node *cpu_node;
+	/* avoid cross node collections and its mapping */
+	if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) {
+		struct device_node *cpu_node;
 
-			cpu_node = of_get_cpu_node(cpu, NULL);
-			if (its->numa_node != NUMA_NO_NODE &&
-				its->numa_node != of_node_to_nid(cpu_node))
-				continue;
-		}
+		cpu_node = of_get_cpu_node(cpu, NULL);
+		if (its->numa_node != NUMA_NO_NODE &&
+			its->numa_node != of_node_to_nid(cpu_node))
+			return;
+	}
 
+	/*
+	 * We now have to bind each collection to its target
+	 * redistributor.
+	 */
+	if (gic_read_typer(its->base + GITS_TYPER) & GITS_TYPER_PTA) {
 		/*
-		 * We now have to bind each collection to its target
+		 * This ITS wants the physical address of the
 		 * redistributor.
 		 */
-		if (gic_read_typer(its->base + GITS_TYPER) & GITS_TYPER_PTA) {
-			/*
-			 * This ITS wants the physical address of the
-			 * redistributor.
-			 */
-			target = gic_data_rdist()->phys_base;
-		} else {
-			/*
-			 * This ITS wants a linear CPU number.
-			 */
-			target = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
-			target = GICR_TYPER_CPU_NUMBER(target) << 16;
-		}
+		target = gic_data_rdist()->phys_base;
+	} else {
+		/* This ITS wants a linear CPU number. */
+		target = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
+		target = GICR_TYPER_CPU_NUMBER(target) << 16;
+	}
 
-		/* Perform collection mapping */
-		its->collections[cpu].target_address = target;
-		its->collections[cpu].col_id = cpu;
+	/* Perform collection mapping */
+	its->collections[cpu].target_address = target;
+	its->collections[cpu].col_id = cpu;
 
-		its_send_mapc(its, &its->collections[cpu], 1);
-		its_send_invall(its, &its->collections[cpu]);
-	}
+	its_send_mapc(its, &its->collections[cpu], 1);
+	its_send_invall(its, &its->collections[cpu]);
+}
+
+static void its_cpu_init_collections(void)
+{
+	struct its_node *its;
+
+	spin_lock(&its_lock);
+
+	list_for_each_entry(its, &its_nodes, entry)
+		its_cpu_init_collection(its);
 
 	spin_unlock(&its_lock);
 }
@@ -3041,6 +3036,113 @@ static void its_enable_quirks(struct its_node *its)
 	gic_enable_quirks(iidr, its_quirks, its);
 }
 
+static int its_save_disable(void)
+{
+	struct its_node *its;
+	int err = 0;
+
+	spin_lock(&its_lock);
+	list_for_each_entry(its, &its_nodes, entry) {
+		void __iomem *base;
+
+		if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE))
+			continue;
+
+		base = its->base;
+		its->ctlr_save = readl_relaxed(base + GITS_CTLR);
+		err = its_force_quiescent(base);
+		if (err) {
+			pr_err("ITS@%pa: failed to quiesce: %d\n",
+			       &its->phys_base, err);
+			writel_relaxed(its->ctlr_save, base + GITS_CTLR);
+			goto err;
+		}
+
+		its->cbaser_save = gits_read_cbaser(base + GITS_CBASER);
+	}
+
+err:
+	if (err) {
+		list_for_each_entry_continue_reverse(its, &its_nodes, entry) {
+			void __iomem *base;
+
+			if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE))
+				continue;
+
+			base = its->base;
+			writel_relaxed(its->ctlr_save, base + GITS_CTLR);
+		}
+	}
+	spin_unlock(&its_lock);
+
+	return err;
+}
+
+static void its_restore_enable(void)
+{
+	struct its_node *its;
+	int ret;
+
+	spin_lock(&its_lock);
+	list_for_each_entry(its, &its_nodes, entry) {
+		void __iomem *base;
+		int i;
+
+		if (!(its->flags & ITS_FLAGS_SAVE_SUSPEND_STATE))
+			continue;
+
+		base = its->base;
+
+		/*
+		 * Make sure that the ITS is disabled. If it fails to quiesce,
+		 * don't restore it since writing to CBASER or BASER<n>
+		 * registers is undefined according to the GIC v3 ITS
+		 * Specification.
+		 */
+		ret = its_force_quiescent(base);
+		if (ret) {
+			pr_err("ITS@%pa: failed to quiesce on resume: %d\n",
+			       &its->phys_base, ret);
+			continue;
+		}
+
+		gits_write_cbaser(its->cbaser_save, base + GITS_CBASER);
+
+		/*
+		 * Writing CBASER resets CREADR to 0, so make CWRITER and
+		 * cmd_write line up with it.
+		 */
+		its->cmd_write = its->cmd_base;
+		gits_write_cwriter(0, base + GITS_CWRITER);
+
+		/* Restore GITS_BASER from the value cache. */
+		for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+			struct its_baser *baser = &its->tables[i];
+
+			if (!(baser->val & GITS_BASER_VALID))
+				continue;
+
+			its_write_baser(its, baser, baser->val);
+		}
+		writel_relaxed(its->ctlr_save, base + GITS_CTLR);
+
+		/*
+		 * Reinit the collection if it's stored in the ITS. This is
+		 * indicated by the col_id being less than the HCC field.
+		 * CID < HCC as specified in the GIC v3 Documentation.
+		 */
+		if (its->collections[smp_processor_id()].col_id <
+		    GITS_TYPER_HCC(gic_read_typer(base + GITS_TYPER)))
+			its_cpu_init_collection(its);
+	}
+	spin_unlock(&its_lock);
+}
+
+static struct syscore_ops its_syscore_ops = {
+	.suspend = its_save_disable,
+	.resume = its_restore_enable,
+};
+
 static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
 {
 	struct irq_domain *inner_domain;
@@ -3260,6 +3362,9 @@ static int __init its_probe_one(struct resource *res,
 		ctlr |= GITS_CTLR_ImDe;
 	writel_relaxed(ctlr, its->base + GITS_CTLR);
 
+	if (GITS_TYPER_HCC(typer))
+		its->flags |= ITS_FLAGS_SAVE_SUSPEND_STATE;
+
 	err = its_init_domain(handle, its);
 	if (err)
 		goto out_free_tables;
@@ -3287,15 +3392,71 @@ static bool gic_rdists_supports_plpis(void)
 	return !!(gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS);
 }
 
+static int redist_disable_lpis(void)
+{
+	void __iomem *rbase = gic_data_rdist_rd_base();
+	u64 timeout = USEC_PER_SEC;
+	u64 val;
+
+	if (!gic_rdists_supports_plpis()) {
+		pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
+		return -ENXIO;
+	}
+
+	val = readl_relaxed(rbase + GICR_CTLR);
+	if (!(val & GICR_CTLR_ENABLE_LPIS))
+		return 0;
+
+	pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
+		smp_processor_id());
+	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
+
+	/* Disable LPIs */
+	val &= ~GICR_CTLR_ENABLE_LPIS;
+	writel_relaxed(val, rbase + GICR_CTLR);
+
+	/* Make sure any change to GICR_CTLR is observable by the GIC */
+	dsb(sy);
+
+	/*
+	 * Software must observe RWP==0 after clearing GICR_CTLR.EnableLPIs
+	 * from 1 to 0 before programming GICR_PEND{PROP}BASER registers.
+	 * Error out if we time out waiting for RWP to clear.
+	 */
+	while (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_RWP) {
+		if (!timeout) {
+			pr_err("CPU%d: Timeout while disabling LPIs\n",
+			       smp_processor_id());
+			return -ETIMEDOUT;
+		}
+		udelay(1);
+		timeout--;
+	}
+
+	/*
+	 * After it has been written to 1, it is IMPLEMENTATION
+	 * DEFINED whether GICR_CTLR.EnableLPI becomes RES1 or can be
+	 * cleared to 0. Error out if clearing the bit failed.
+	 */
+	if (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_ENABLE_LPIS) {
+		pr_err("CPU%d: Failed to disable LPIs\n", smp_processor_id());
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 int its_cpu_init(void)
 {
 	if (!list_empty(&its_nodes)) {
-		if (!gic_rdists_supports_plpis()) {
-			pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
-			return -ENXIO;
-		}
+		int ret;
+
+		ret = redist_disable_lpis();
+		if (ret)
+			return ret;
+
 		its_cpu_init_lpis();
-		its_cpu_init_collection();
+		its_cpu_init_collections();
 	}
 
 	return 0;
@@ -3516,5 +3677,7 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
 		}
 	}
 
+	register_syscore_ops(&its_syscore_ops);
+
 	return 0;
 }
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index d99cc07903ec..e5d101418390 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -61,7 +61,7 @@ struct gic_chip_data {
 };
 
 static struct gic_chip_data gic_data __read_mostly;
-static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
+static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
 
 static struct gic_kvm_info gic_v3_kvm_info;
 static DEFINE_PER_CPU(bool, has_rss);
@@ -354,7 +354,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 		if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
 			int err;
 
-			if (static_key_true(&supports_deactivate))
+			if (static_branch_likely(&supports_deactivate_key))
 				gic_write_eoir(irqnr);
 			else
 				isb();
@@ -362,7 +362,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 			err = handle_domain_irq(gic_data.domain, irqnr, regs);
 			if (err) {
 				WARN_ONCE(true, "Unexpected interrupt received!\n");
-				if (static_key_true(&supports_deactivate)) {
+				if (static_branch_likely(&supports_deactivate_key)) {
 					if (irqnr < 8192)
 						gic_write_dir(irqnr);
 				} else {
@@ -373,7 +373,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 		}
 		if (irqnr < 16) {
 			gic_write_eoir(irqnr);
-			if (static_key_true(&supports_deactivate))
+			if (static_branch_likely(&supports_deactivate_key))
 				gic_write_dir(irqnr);
 #ifdef CONFIG_SMP
 			/*
@@ -532,6 +532,8 @@ static void gic_cpu_sys_reg_init(void)
 	int i, cpu = smp_processor_id();
 	u64 mpidr = cpu_logical_map(cpu);
 	u64 need_rss = MPIDR_RS(mpidr);
+	bool group0;
+	u32 val, pribits;
 
 	/*
 	 * Need to check that the SRE bit has actually been set. If
@@ -543,8 +545,28 @@ static void gic_cpu_sys_reg_init(void)
 	if (!gic_enable_sre())
 		pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n");
 
+	pribits = gic_read_ctlr();
+	pribits &= ICC_CTLR_EL1_PRI_BITS_MASK;
+	pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT;
+	pribits++;
+
+	/*
+	 * Let's find out if Group0 is under control of EL3 or not by
+	 * setting the highest possible, non-zero priority in PMR.
+	 *
+	 * If SCR_EL3.FIQ is set, the priority gets shifted down in
+	 * order for the CPU interface to set bit 7, and keep the
+	 * actual priority in the non-secure range. In the process, it
+	 * looses the least significant bit and the actual priority
+	 * becomes 0x80. Reading it back returns 0, indicating that
+	 * we're don't have access to Group0.
+	 */
+	write_gicreg(BIT(8 - pribits), ICC_PMR_EL1);
+	val = read_gicreg(ICC_PMR_EL1);
+	group0 = val != 0;
+
 	/* Set priority mask register */
-	gic_write_pmr(DEFAULT_PMR_VALUE);
+	write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1);
 
 	/*
 	 * Some firmwares hand over to the kernel with the BPR changed from
@@ -554,7 +576,7 @@ static void gic_cpu_sys_reg_init(void)
 	 */
 	gic_write_bpr1(0);
 
-	if (static_key_true(&supports_deactivate)) {
+	if (static_branch_likely(&supports_deactivate_key)) {
 		/* EOI drops priority only (mode 1) */
 		gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
 	} else {
@@ -562,6 +584,37 @@ static void gic_cpu_sys_reg_init(void)
 		gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir);
 	}
 
+	/* Always whack Group0 before Group1 */
+	if (group0) {
+		switch(pribits) {
+		case 8:
+		case 7:
+			write_gicreg(0, ICC_AP0R3_EL1);
+			write_gicreg(0, ICC_AP0R2_EL1);
+		case 6:
+			write_gicreg(0, ICC_AP0R1_EL1);
+		case 5:
+		case 4:
+			write_gicreg(0, ICC_AP0R0_EL1);
+		}
+
+		isb();
+	}
+
+	switch(pribits) {
+	case 8:
+	case 7:
+		write_gicreg(0, ICC_AP1R3_EL1);
+		write_gicreg(0, ICC_AP1R2_EL1);
+	case 6:
+		write_gicreg(0, ICC_AP1R1_EL1);
+	case 5:
+	case 4:
+		write_gicreg(0, ICC_AP1R0_EL1);
+	}
+
+	isb();
+
 	/* ... and let's hit the road... */
 	gic_write_grpen1(1);
 
@@ -590,9 +643,17 @@ static void gic_cpu_sys_reg_init(void)
 		pr_crit_once("RSS is required but GICD doesn't support it\n");
 }
 
+static bool gicv3_nolpi;
+
+static int __init gicv3_nolpi_cfg(char *buf)
+{
+	return strtobool(buf, &gicv3_nolpi);
+}
+early_param("irqchip.gicv3_nolpi", gicv3_nolpi_cfg);
+
 static int gic_dist_supports_lpis(void)
 {
-	return !!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS);
+	return !!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS) && !gicv3_nolpi;
 }
 
 static void gic_cpu_init(void)
@@ -823,7 +884,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 {
 	struct irq_chip *chip = &gic_chip;
 
-	if (static_key_true(&supports_deactivate))
+	if (static_branch_likely(&supports_deactivate_key))
 		chip = &gic_eoimode1_chip;
 
 	/* SGIs are private to the core kernel */
@@ -861,6 +922,8 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 	return 0;
 }
 
+#define GIC_IRQ_TYPE_PARTITION	(GIC_IRQ_TYPE_LPI + 1)
+
 static int gic_irq_domain_translate(struct irq_domain *d,
 				    struct irq_fwspec *fwspec,
 				    unsigned long *hwirq,
@@ -875,6 +938,7 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 			*hwirq = fwspec->param[1] + 32;
 			break;
 		case 1:			/* PPI */
+		case GIC_IRQ_TYPE_PARTITION:
 			*hwirq = fwspec->param[1] + 16;
 			break;
 		case GIC_IRQ_TYPE_LPI:	/* LPI */
@@ -885,6 +949,13 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 		}
 
 		*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+
+		/*
+		 * Make it clear that broken DTs are... broken.
+		 * Partitionned PPIs are an unfortunate exception.
+		 */
+		WARN_ON(*type == IRQ_TYPE_NONE &&
+			fwspec->param[0] != GIC_IRQ_TYPE_PARTITION);
 		return 0;
 	}
 
@@ -894,6 +965,8 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 
 		*hwirq = fwspec->param[0];
 		*type = fwspec->param[1];
+
+		WARN_ON(*type == IRQ_TYPE_NONE);
 		return 0;
 	}
 
@@ -1002,9 +1075,9 @@ static int __init gic_init_bases(void __iomem *dist_base,
 	int err;
 
 	if (!is_hyp_mode_available())
-		static_key_slow_dec(&supports_deactivate);
+		static_branch_disable(&supports_deactivate_key);
 
-	if (static_key_true(&supports_deactivate))
+	if (static_branch_likely(&supports_deactivate_key))
 		pr_info("GIC: Using split EOI/Deactivate mode\n");
 
 	gic_data.fwnode = handle;
@@ -1140,7 +1213,7 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
 			.fwnode		= gic_data.fwnode,
 			.param_count	= 3,
 			.param		= {
-				[0]	= 1,
+				[0]	= GIC_IRQ_TYPE_PARTITION,
 				[1]	= i,
 				[2]	= IRQ_TYPE_NONE,
 			},
@@ -1239,7 +1312,7 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 
 	gic_populate_ppi_partitions(node);
 
-	if (static_key_true(&supports_deactivate))
+	if (static_branch_likely(&supports_deactivate_key))
 		gic_of_setup_kvm_info(node);
 	return 0;
 
@@ -1541,7 +1614,7 @@ gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
 
 	acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
 
-	if (static_key_true(&supports_deactivate))
+	if (static_branch_likely(&supports_deactivate_key))
 		gic_acpi_setup_kvm_info();
 
 	return 0;
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 121af5cf688f..ced10c44b68a 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -121,7 +121,7 @@ static DEFINE_RAW_SPINLOCK(cpu_map_lock);
 #define NR_GIC_CPU_IF 8
 static u8 gic_cpu_map[NR_GIC_CPU_IF] __read_mostly;
 
-static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
+static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
 
 static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
 
@@ -361,7 +361,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 		irqnr = irqstat & GICC_IAR_INT_ID_MASK;
 
 		if (likely(irqnr > 15 && irqnr < 1020)) {
-			if (static_key_true(&supports_deactivate))
+			if (static_branch_likely(&supports_deactivate_key))
 				writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
 			isb();
 			handle_domain_irq(gic->domain, irqnr, regs);
@@ -369,7 +369,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 		}
 		if (irqnr < 16) {
 			writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
-			if (static_key_true(&supports_deactivate))
+			if (static_branch_likely(&supports_deactivate_key))
 				writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
 #ifdef CONFIG_SMP
 			/*
@@ -453,15 +453,26 @@ static u8 gic_get_cpumask(struct gic_chip_data *gic)
 	return mask;
 }
 
+static bool gic_check_gicv2(void __iomem *base)
+{
+	u32 val = readl_relaxed(base + GIC_CPU_IDENT);
+	return (val & 0xff0fff) == 0x02043B;
+}
+
 static void gic_cpu_if_up(struct gic_chip_data *gic)
 {
 	void __iomem *cpu_base = gic_data_cpu_base(gic);
 	u32 bypass = 0;
 	u32 mode = 0;
+	int i;
 
-	if (gic == &gic_data[0] && static_key_true(&supports_deactivate))
+	if (gic == &gic_data[0] && static_branch_likely(&supports_deactivate_key))
 		mode = GIC_CPU_CTRL_EOImodeNS;
 
+	if (gic_check_gicv2(cpu_base))
+		for (i = 0; i < 4; i++)
+			writel_relaxed(0, cpu_base + GIC_CPU_ACTIVEPRIO + i * 4);
+
 	/*
 	* Preserve bypass disable bits to be written back later
 	*/
@@ -1000,6 +1011,9 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 			*hwirq += 16;
 
 		*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+
+		/* Make it clear that broken DTs are... broken */
+		WARN_ON(*type == IRQ_TYPE_NONE);
 		return 0;
 	}
 
@@ -1009,6 +1023,8 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 
 		*hwirq = fwspec->param[0];
 		*type = fwspec->param[1];
+
+		WARN_ON(*type == IRQ_TYPE_NONE);
 		return 0;
 	}
 
@@ -1203,11 +1219,11 @@ static int __init __gic_init_bases(struct gic_chip_data *gic,
 					  "irqchip/arm/gic:starting",
 					  gic_starting_cpu, NULL);
 		set_handle_irq(gic_handle_irq);
-		if (static_key_true(&supports_deactivate))
+		if (static_branch_likely(&supports_deactivate_key))
 			pr_info("GIC: Using split EOI/Deactivate mode\n");
 	}
 
-	if (static_key_true(&supports_deactivate) && gic == &gic_data[0]) {
+	if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) {
 		name = kasprintf(GFP_KERNEL, "GICv2");
 		gic_init_chip(gic, NULL, name, true);
 	} else {
@@ -1234,7 +1250,7 @@ void __init gic_init(unsigned int gic_nr, int irq_start,
 	 * Non-DT/ACPI systems won't run a hypervisor, so let's not
 	 * bother with these...
 	 */
-	static_key_slow_dec(&supports_deactivate);
+	static_branch_disable(&supports_deactivate_key);
 
 	gic = &gic_data[gic_nr];
 	gic->raw_dist_base = dist_base;
@@ -1264,12 +1280,6 @@ static int __init gicv2_force_probe_cfg(char *buf)
 }
 early_param("irqchip.gicv2_force_probe", gicv2_force_probe_cfg);
 
-static bool gic_check_gicv2(void __iomem *base)
-{
-	u32 val = readl_relaxed(base + GIC_CPU_IDENT);
-	return (val & 0xff0fff) == 0x02043B;
-}
-
 static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
 {
 	struct resource cpuif_res;
@@ -1420,7 +1430,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
 	if (ret)
 		return;
 
-	if (static_key_true(&supports_deactivate))
+	if (static_branch_likely(&supports_deactivate_key))
 		gic_set_kvm_info(&gic_v2_kvm_info);
 }
 
@@ -1447,7 +1457,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 	 * or the CPU interface is too small.
 	 */
 	if (gic_cnt == 0 && !gic_check_eoimode(node, &gic->raw_cpu_base))
-		static_key_slow_dec(&supports_deactivate);
+		static_branch_disable(&supports_deactivate_key);
 
 	ret = __gic_init_bases(gic, -1, &node->fwnode);
 	if (ret) {
@@ -1628,7 +1638,7 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
 	 * interface will always be the right size.
 	 */
 	if (!is_hyp_mode_available())
-		static_key_slow_dec(&supports_deactivate);
+		static_branch_disable(&supports_deactivate_key);
 
 	/*
 	 * Initialize GIC instance zero (no multi-GIC support).
@@ -1653,7 +1663,7 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
 	if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
 		gicv2m_init(NULL, gic_data[0].domain);
 
-	if (static_key_true(&supports_deactivate))
+	if (static_branch_likely(&supports_deactivate_key))
 		gic_acpi_setup_kvm_info();
 
 	return 0;
diff --git a/drivers/irqchip/irq-mscc-ocelot.c b/drivers/irqchip/irq-mscc-ocelot.c
new file mode 100644
index 000000000000..b63e40c00a02
--- /dev/null
+++ b/drivers/irqchip/irq-mscc-ocelot.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi Ocelot IRQ controller driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/interrupt.h>
+
+#define ICPU_CFG_INTR_INTR_STICKY	0x10
+#define ICPU_CFG_INTR_INTR_ENA		0x18
+#define ICPU_CFG_INTR_INTR_ENA_CLR	0x1c
+#define ICPU_CFG_INTR_INTR_ENA_SET	0x20
+#define ICPU_CFG_INTR_DST_INTR_IDENT(x)	(0x38 + 0x4 * (x))
+#define ICPU_CFG_INTR_INTR_TRIGGER(x)	(0x5c + 0x4 * (x))
+
+#define OCELOT_NR_IRQ 24
+
+static void ocelot_irq_unmask(struct irq_data *data)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+	struct irq_chip_type *ct = irq_data_get_chip_type(data);
+	unsigned int mask = data->mask;
+	u32 val;
+
+	irq_gc_lock(gc);
+	val = irq_reg_readl(gc, ICPU_CFG_INTR_INTR_TRIGGER(0)) |
+	      irq_reg_readl(gc, ICPU_CFG_INTR_INTR_TRIGGER(1));
+	if (!(val & mask))
+		irq_reg_writel(gc, mask, ICPU_CFG_INTR_INTR_STICKY);
+
+	*ct->mask_cache &= ~mask;
+	irq_reg_writel(gc, mask, ICPU_CFG_INTR_INTR_ENA_SET);
+	irq_gc_unlock(gc);
+}
+
+static void ocelot_irq_handler(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct irq_domain *d = irq_desc_get_handler_data(desc);
+	struct irq_chip_generic *gc = irq_get_domain_generic_chip(d, 0);
+	u32 reg = irq_reg_readl(gc, ICPU_CFG_INTR_DST_INTR_IDENT(0));
+
+	chained_irq_enter(chip, desc);
+
+	while (reg) {
+		u32 hwirq = __fls(reg);
+
+		generic_handle_irq(irq_find_mapping(d, hwirq));
+		reg &= ~(BIT(hwirq));
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int __init ocelot_irq_init(struct device_node *node,
+				  struct device_node *parent)
+{
+	struct irq_domain *domain;
+	struct irq_chip_generic *gc;
+	int parent_irq, ret;
+
+	parent_irq = irq_of_parse_and_map(node, 0);
+	if (!parent_irq)
+		return -EINVAL;
+
+	domain = irq_domain_add_linear(node, OCELOT_NR_IRQ,
+				       &irq_generic_chip_ops, NULL);
+	if (!domain) {
+		pr_err("%s: unable to add irq domain\n", node->name);
+		return -ENOMEM;
+	}
+
+	ret = irq_alloc_domain_generic_chips(domain, OCELOT_NR_IRQ, 1,
+					     "icpu", handle_level_irq,
+					     0, 0, 0);
+	if (ret) {
+		pr_err("%s: unable to alloc irq domain gc\n", node->name);
+		goto err_domain_remove;
+	}
+
+	gc = irq_get_domain_generic_chip(domain, 0);
+	gc->reg_base = of_iomap(node, 0);
+	if (!gc->reg_base) {
+		pr_err("%s: unable to map resource\n", node->name);
+		ret = -ENOMEM;
+		goto err_gc_free;
+	}
+
+	gc->chip_types[0].regs.ack = ICPU_CFG_INTR_INTR_STICKY;
+	gc->chip_types[0].regs.mask = ICPU_CFG_INTR_INTR_ENA_CLR;
+	gc->chip_types[0].chip.irq_ack = irq_gc_ack_set_bit;
+	gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit;
+	gc->chip_types[0].chip.irq_unmask = ocelot_irq_unmask;
+
+	/* Mask and ack all interrupts */
+	irq_reg_writel(gc, 0, ICPU_CFG_INTR_INTR_ENA);
+	irq_reg_writel(gc, 0xffffffff, ICPU_CFG_INTR_INTR_STICKY);
+
+	irq_set_chained_handler_and_data(parent_irq, ocelot_irq_handler,
+					 domain);
+
+	return 0;
+
+err_gc_free:
+	irq_free_generic_chip(gc);
+
+err_domain_remove:
+	irq_domain_remove(domain);
+
+	return ret;
+}
+IRQCHIP_DECLARE(ocelot_icpu, "mscc,ocelot-icpu-intr", ocelot_irq_init);
diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c
index cee59fe1321c..c6e6c9e9137a 100644
--- a/drivers/irqchip/irq-renesas-intc-irqpin.c
+++ b/drivers/irqchip/irq-renesas-intc-irqpin.c
@@ -17,7 +17,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/clk.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
@@ -78,16 +77,14 @@ struct intc_irqpin_priv {
 	struct platform_device *pdev;
 	struct irq_chip irq_chip;
 	struct irq_domain *irq_domain;
-	struct clk *clk;
+	atomic_t wakeup_path;
 	unsigned shared_irqs:1;
-	unsigned needs_clk:1;
 	u8 shared_irq_mask;
 };
 
 struct intc_irqpin_config {
 	unsigned int irlm_bit;
 	unsigned needs_irlm:1;
-	unsigned needs_clk:1;
 };
 
 static unsigned long intc_irqpin_read32(void __iomem *iomem)
@@ -287,14 +284,10 @@ static int intc_irqpin_irq_set_wake(struct irq_data *d, unsigned int on)
 	int hw_irq = irqd_to_hwirq(d);
 
 	irq_set_irq_wake(p->irq[hw_irq].requested_irq, on);
-
-	if (!p->clk)
-		return 0;
-
 	if (on)
-		clk_enable(p->clk);
+		atomic_inc(&p->wakeup_path);
 	else
-		clk_disable(p->clk);
+		atomic_dec(&p->wakeup_path);
 
 	return 0;
 }
@@ -369,12 +362,10 @@ static const struct irq_domain_ops intc_irqpin_irq_domain_ops = {
 static const struct intc_irqpin_config intc_irqpin_irlm_r8a777x = {
 	.irlm_bit = 23, /* ICR0.IRLM0 */
 	.needs_irlm = 1,
-	.needs_clk = 0,
 };
 
 static const struct intc_irqpin_config intc_irqpin_rmobile = {
 	.needs_irlm = 0,
-	.needs_clk = 1,
 };
 
 static const struct of_device_id intc_irqpin_dt_ids[] = {
@@ -426,18 +417,6 @@ static int intc_irqpin_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, p);
 
 	config = of_device_get_match_data(dev);
-	if (config)
-		p->needs_clk = config->needs_clk;
-
-	p->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(p->clk)) {
-		if (p->needs_clk) {
-			dev_err(dev, "unable to get clock\n");
-			ret = PTR_ERR(p->clk);
-			goto err0;
-		}
-		p->clk = NULL;
-	}
 
 	pm_runtime_enable(dev);
 	pm_runtime_get_sync(dev);
@@ -606,12 +585,25 @@ static int intc_irqpin_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int __maybe_unused intc_irqpin_suspend(struct device *dev)
+{
+	struct intc_irqpin_priv *p = dev_get_drvdata(dev);
+
+	if (atomic_read(&p->wakeup_path))
+		device_set_wakeup_path(dev);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(intc_irqpin_pm_ops, intc_irqpin_suspend, NULL);
+
 static struct platform_driver intc_irqpin_device_driver = {
 	.probe		= intc_irqpin_probe,
 	.remove		= intc_irqpin_remove,
 	.driver		= {
 		.name	= "renesas_intc_irqpin",
 		.of_match_table = intc_irqpin_dt_ids,
+		.pm	= &intc_irqpin_pm_ops,
 	}
 };
 
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 52304b139aa4..a4f11124024d 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -17,7 +17,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#include <linux/clk.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
@@ -64,7 +63,7 @@ struct irqc_priv {
 	struct platform_device *pdev;
 	struct irq_chip_generic *gc;
 	struct irq_domain *irq_domain;
-	struct clk *clk;
+	atomic_t wakeup_path;
 };
 
 static struct irqc_priv *irq_data_to_priv(struct irq_data *data)
@@ -111,14 +110,10 @@ static int irqc_irq_set_wake(struct irq_data *d, unsigned int on)
 	int hw_irq = irqd_to_hwirq(d);
 
 	irq_set_irq_wake(p->irq[hw_irq].requested_irq, on);
-
-	if (!p->clk)
-		return 0;
-
 	if (on)
-		clk_enable(p->clk);
+		atomic_inc(&p->wakeup_path);
 	else
-		clk_disable(p->clk);
+		atomic_dec(&p->wakeup_path);
 
 	return 0;
 }
@@ -159,12 +154,6 @@ static int irqc_probe(struct platform_device *pdev)
 	p->pdev = pdev;
 	platform_set_drvdata(pdev, p);
 
-	p->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(p->clk)) {
-		dev_warn(&pdev->dev, "unable to get clock\n");
-		p->clk = NULL;
-	}
-
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
@@ -276,6 +265,18 @@ static int irqc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int __maybe_unused irqc_suspend(struct device *dev)
+{
+	struct irqc_priv *p = dev_get_drvdata(dev);
+
+	if (atomic_read(&p->wakeup_path))
+		device_set_wakeup_path(dev);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(irqc_pm_ops, irqc_suspend, NULL);
+
 static const struct of_device_id irqc_dt_ids[] = {
 	{ .compatible = "renesas,irqc", },
 	{},
@@ -288,6 +289,7 @@ static struct platform_driver irqc_device_driver = {
 	.driver		= {
 		.name	= "renesas_irqc",
 		.of_match_table	= irqc_dt_ids,
+		.pm	= &irqc_pm_ops,
 	}
 };
 
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
new file mode 100644
index 000000000000..b1b47a40a278
--- /dev/null
+++ b/drivers/irqchip/qcom-pdc.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define PDC_MAX_IRQS		126
+
+#define CLEAR_INTR(reg, intr)	(reg & ~(1 << intr))
+#define ENABLE_INTR(reg, intr)	(reg | (1 << intr))
+
+#define IRQ_ENABLE_BANK		0x10
+#define IRQ_i_CFG		0x110
+
+struct pdc_pin_region {
+	u32 pin_base;
+	u32 parent_base;
+	u32 cnt;
+};
+
+static DEFINE_RAW_SPINLOCK(pdc_lock);
+static void __iomem *pdc_base;
+static struct pdc_pin_region *pdc_region;
+static int pdc_region_cnt;
+
+static void pdc_reg_write(int reg, u32 i, u32 val)
+{
+	writel_relaxed(val, pdc_base + reg + i * sizeof(u32));
+}
+
+static u32 pdc_reg_read(int reg, u32 i)
+{
+	return readl_relaxed(pdc_base + reg + i * sizeof(u32));
+}
+
+static void pdc_enable_intr(struct irq_data *d, bool on)
+{
+	int pin_out = d->hwirq;
+	u32 index, mask;
+	u32 enable;
+
+	index = pin_out / 32;
+	mask = pin_out % 32;
+
+	raw_spin_lock(&pdc_lock);
+	enable = pdc_reg_read(IRQ_ENABLE_BANK, index);
+	enable = on ? ENABLE_INTR(enable, mask) : CLEAR_INTR(enable, mask);
+	pdc_reg_write(IRQ_ENABLE_BANK, index, enable);
+	raw_spin_unlock(&pdc_lock);
+}
+
+static void qcom_pdc_gic_mask(struct irq_data *d)
+{
+	pdc_enable_intr(d, false);
+	irq_chip_mask_parent(d);
+}
+
+static void qcom_pdc_gic_unmask(struct irq_data *d)
+{
+	pdc_enable_intr(d, true);
+	irq_chip_unmask_parent(d);
+}
+
+/*
+ * GIC does not handle falling edge or active low. To allow falling edge and
+ * active low interrupts to be handled at GIC, PDC has an inverter that inverts
+ * falling edge into a rising edge and active low into an active high.
+ * For the inverter to work, the polarity bit in the IRQ_CONFIG register has to
+ * set as per the table below.
+ * Level sensitive active low    LOW
+ * Rising edge sensitive         NOT USED
+ * Falling edge sensitive        LOW
+ * Dual Edge sensitive           NOT USED
+ * Level sensitive active High   HIGH
+ * Falling Edge sensitive        NOT USED
+ * Rising edge sensitive         HIGH
+ * Dual Edge sensitive           HIGH
+ */
+enum pdc_irq_config_bits {
+	PDC_LEVEL_LOW		= 0b000,
+	PDC_EDGE_FALLING	= 0b010,
+	PDC_LEVEL_HIGH		= 0b100,
+	PDC_EDGE_RISING		= 0b110,
+	PDC_EDGE_DUAL		= 0b111,
+};
+
+/**
+ * qcom_pdc_gic_set_type: Configure PDC for the interrupt
+ *
+ * @d: the interrupt data
+ * @type: the interrupt type
+ *
+ * If @type is edge triggered, forward that as Rising edge as PDC
+ * takes care of converting falling edge to rising edge signal
+ * If @type is level, then forward that as level high as PDC
+ * takes care of converting falling edge to rising edge signal
+ */
+static int qcom_pdc_gic_set_type(struct irq_data *d, unsigned int type)
+{
+	int pin_out = d->hwirq;
+	enum pdc_irq_config_bits pdc_type;
+
+	switch (type) {
+	case IRQ_TYPE_EDGE_RISING:
+		pdc_type = PDC_EDGE_RISING;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		pdc_type = PDC_EDGE_FALLING;
+		type = IRQ_TYPE_EDGE_RISING;
+		break;
+	case IRQ_TYPE_EDGE_BOTH:
+		pdc_type = PDC_EDGE_DUAL;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		pdc_type = PDC_LEVEL_HIGH;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		pdc_type = PDC_LEVEL_LOW;
+		type = IRQ_TYPE_LEVEL_HIGH;
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	pdc_reg_write(IRQ_i_CFG, pin_out, pdc_type);
+
+	return irq_chip_set_type_parent(d, type);
+}
+
+static struct irq_chip qcom_pdc_gic_chip = {
+	.name			= "PDC",
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_mask		= qcom_pdc_gic_mask,
+	.irq_unmask		= qcom_pdc_gic_unmask,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_type		= qcom_pdc_gic_set_type,
+	.flags			= IRQCHIP_MASK_ON_SUSPEND |
+				  IRQCHIP_SET_TYPE_MASKED |
+				  IRQCHIP_SKIP_SET_WAKE,
+	.irq_set_vcpu_affinity	= irq_chip_set_vcpu_affinity_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+};
+
+static irq_hw_number_t get_parent_hwirq(int pin)
+{
+	int i;
+	struct pdc_pin_region *region;
+
+	for (i = 0; i < pdc_region_cnt; i++) {
+		region = &pdc_region[i];
+		if (pin >= region->pin_base &&
+		    pin < region->pin_base + region->cnt)
+			return (region->parent_base + pin - region->pin_base);
+	}
+
+	WARN_ON(1);
+	return ~0UL;
+}
+
+static int qcom_pdc_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
+			      unsigned long *hwirq, unsigned int *type)
+{
+	if (is_of_node(fwspec->fwnode)) {
+		if (fwspec->param_count != 2)
+			return -EINVAL;
+
+		*hwirq = fwspec->param[0];
+		*type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int qcom_pdc_alloc(struct irq_domain *domain, unsigned int virq,
+			  unsigned int nr_irqs, void *data)
+{
+	struct irq_fwspec *fwspec = data;
+	struct irq_fwspec parent_fwspec;
+	irq_hw_number_t hwirq, parent_hwirq;
+	unsigned int type;
+	int ret;
+
+	ret = qcom_pdc_translate(domain, fwspec, &hwirq, &type);
+	if (ret)
+		return -EINVAL;
+
+	parent_hwirq = get_parent_hwirq(hwirq);
+	if (parent_hwirq == ~0UL)
+		return -EINVAL;
+
+	ret  = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+					     &qcom_pdc_gic_chip, NULL);
+	if (ret)
+		return ret;
+
+	if (type & IRQ_TYPE_EDGE_BOTH)
+		type = IRQ_TYPE_EDGE_RISING;
+
+	if (type & IRQ_TYPE_LEVEL_MASK)
+		type = IRQ_TYPE_LEVEL_HIGH;
+
+	parent_fwspec.fwnode      = domain->parent->fwnode;
+	parent_fwspec.param_count = 3;
+	parent_fwspec.param[0]    = 0;
+	parent_fwspec.param[1]    = parent_hwirq;
+	parent_fwspec.param[2]    = type;
+
+	return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs,
+					    &parent_fwspec);
+}
+
+static const struct irq_domain_ops qcom_pdc_ops = {
+	.translate	= qcom_pdc_translate,
+	.alloc		= qcom_pdc_alloc,
+	.free		= irq_domain_free_irqs_common,
+};
+
+static int pdc_setup_pin_mapping(struct device_node *np)
+{
+	int ret, n;
+
+	n = of_property_count_elems_of_size(np, "qcom,pdc-ranges", sizeof(u32));
+	if (n <= 0 || n % 3)
+		return -EINVAL;
+
+	pdc_region_cnt = n / 3;
+	pdc_region = kcalloc(pdc_region_cnt, sizeof(*pdc_region), GFP_KERNEL);
+	if (!pdc_region) {
+		pdc_region_cnt = 0;
+		return -ENOMEM;
+	}
+
+	for (n = 0; n < pdc_region_cnt; n++) {
+		ret = of_property_read_u32_index(np, "qcom,pdc-ranges",
+						 n * 3 + 0,
+						 &pdc_region[n].pin_base);
+		if (ret)
+			return ret;
+		ret = of_property_read_u32_index(np, "qcom,pdc-ranges",
+						 n * 3 + 1,
+						 &pdc_region[n].parent_base);
+		if (ret)
+			return ret;
+		ret = of_property_read_u32_index(np, "qcom,pdc-ranges",
+						 n * 3 + 2,
+						 &pdc_region[n].cnt);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int qcom_pdc_init(struct device_node *node, struct device_node *parent)
+{
+	struct irq_domain *parent_domain, *pdc_domain;
+	int ret;
+
+	pdc_base = of_iomap(node, 0);
+	if (!pdc_base) {
+		pr_err("%pOF: unable to map PDC registers\n", node);
+		return -ENXIO;
+	}
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("%pOF: unable to find PDC's parent domain\n", node);
+		ret = -ENXIO;
+		goto fail;
+	}
+
+	ret = pdc_setup_pin_mapping(node);
+	if (ret) {
+		pr_err("%pOF: failed to init PDC pin-hwirq mapping\n", node);
+		goto fail;
+	}
+
+	pdc_domain = irq_domain_create_hierarchy(parent_domain, 0, PDC_MAX_IRQS,
+						 of_fwnode_handle(node),
+						 &qcom_pdc_ops, NULL);
+	if (!pdc_domain) {
+		pr_err("%pOF: GIC domain add failed\n", node);
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	kfree(pdc_region);
+	iounmap(pdc_base);
+	return ret;
+}
+
+IRQCHIP_DECLARE(pdc_sdm845, "qcom,sdm845-pdc", qcom_pdc_init);
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index d77ebbfc67c9..4ea08979312c 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -138,6 +138,9 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
 	struct dentry *dentry;
 	struct inode *inode;
 
+	if (!root)
+		return -ENOMEM;
+
 	inode_lock(d_inode(root));
 	dentry = d_alloc_name(root, name);
 	if (!dentry) {
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 7729eda5909d..971ff336494a 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -122,7 +122,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
 			return pmu_parse_percpu_irq(pmu, irq);
 	}
 
-	if (!pmu_has_irq_affinity(pdev->dev.of_node)) {
+	if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(pdev->dev.of_node)) {
 		pr_warn("no interrupt-affinity property for %pOF, guessing.\n",
 			pdev->dev.of_node);
 	}
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 51b40aecb776..28bb642af18b 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -23,16 +23,30 @@
 #define DRVNAME					PMUNAME "_pmu"
 #define pr_fmt(fmt)				DRVNAME ": " fmt
 
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/capability.h>
 #include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
+#include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/vmalloc.h>
 
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/mmu.h>
 #include <asm/sysreg.h>
 
 #define ARM_SPE_BUF_PAD_BYTE			0
diff --git a/drivers/staging/ccree/Kconfig b/drivers/staging/ccree/Kconfig
index c94dfe8adb63..168191fa0357 100644
--- a/drivers/staging/ccree/Kconfig
+++ b/drivers/staging/ccree/Kconfig
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
-config CRYPTO_DEV_CCREE
+config CRYPTO_DEV_CCREE_OLD
 	tristate "Support for ARM TrustZone CryptoCell C7XX family of Crypto accelerators"
-	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA
+	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA && BROKEN
 	default n
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
diff --git a/drivers/staging/ccree/Makefile b/drivers/staging/ccree/Makefile
index bdc27970f95f..553db5c45354 100644
--- a/drivers/staging/ccree/Makefile
+++ b/drivers/staging/ccree/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_CRYPTO_DEV_CCREE) := ccree.o
+obj-$(CONFIG_CRYPTO_DEV_CCREE_OLD) := ccree.o
 ccree-y := cc_driver.o cc_buffer_mgr.o cc_request_mgr.o cc_cipher.o cc_hash.o cc_aead.o cc_ivgen.o cc_sram_mgr.o
 ccree-$(CONFIG_CRYPTO_FIPS) += cc_fips.o
 ccree-$(CONFIG_DEBUG_FS) += cc_debugfs.o
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index 549369739d80..6cd0318062e8 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -90,7 +90,7 @@ static int ll_dcompare(const struct dentry *dentry,
 	       d_count(dentry));
 
 	/* mountpoint is always valid */
-	if (d_mountpoint((struct dentry *)dentry))
+	if (d_mountpoint(dentry))
 		return 0;
 
 	if (d_lustre_invalid(dentry))
@@ -111,7 +111,7 @@ static int ll_ddelete(const struct dentry *de)
 	LASSERT(de);
 
 	CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
-	       d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping",
+	       d_lustre_invalid(de) ? "deleting" : "keeping",
 	       de, de, de->d_parent, d_inode(de),
 	       d_unhashed(de) ? "" : "hashed,",
 	       list_empty(&de->d_subdirs) ? "" : "subdirs");
@@ -119,7 +119,7 @@ static int ll_ddelete(const struct dentry *de)
 	/* kernel >= 2.6.38 last refcount is decreased after this function. */
 	LASSERT(d_count(de) == 1);
 
-	if (d_lustre_invalid((struct dentry *)de))
+	if (d_lustre_invalid(de))
 		return 1;
 	return 0;
 }
diff --git a/fs/Makefile b/fs/Makefile
index add789ea270a..c9375fd2c8c4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		ioctl.o readdir.o select.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
-		pnode.o splice.o sync.o utimes.o \
+		pnode.o splice.o sync.o utimes.o d_path.o \
 		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
 
 ifeq ($(CONFIG_BLOCK),y)
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 273351ee4c46..167e5dc7eadd 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,7 +1,6 @@
 config BTRFS_FS
 	tristate "Btrfs filesystem support"
-	select CRYPTO
-	select CRYPTO_CRC32C
+	select LIBCRC32C
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
 	select LZO_COMPRESS
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 0c4373628eb4..ca693dd554e9 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -10,7 +10,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
 	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
 	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
-	   uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o
+	   uuid-tree.o props.o free-space-tree.o tree-checker.o
 
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 1ba49ebe67da..0066d95b133f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -46,12 +46,12 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 		BUG();
 	}
 
-	size = __btrfs_getxattr(inode, name, "", 0);
+	size = btrfs_getxattr(inode, name, "", 0);
 	if (size > 0) {
 		value = kzalloc(size, GFP_KERNEL);
 		if (!value)
 			return ERR_PTR(-ENOMEM);
-		size = __btrfs_getxattr(inode, name, value, size);
+		size = btrfs_getxattr(inode, name, value, size);
 	}
 	if (size > 0) {
 		acl = posix_acl_from_xattr(&init_user_ns, value, size);
@@ -65,9 +65,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
-/*
- * Needs to be called with fs_mutex held
- */
 static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
 			 struct inode *inode, struct posix_acl *acl, int type)
 {
@@ -101,7 +98,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
 			goto out;
 	}
 
-	ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
+	ret = btrfs_setxattr(trans, inode, name, value, size, 0);
 out:
 	kfree(value);
 
@@ -127,11 +124,6 @@ int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	return ret;
 }
 
-/*
- * btrfs_init_acl is already generally called under fs_mutex, so the locking
- * stuff has been fixed to work with that.  If the locking stuff changes, we
- * need to re-evaluate the acl locking stuff.
- */
 int btrfs_init_acl(struct btrfs_trans_handle *trans,
 		   struct inode *inode, struct inode *dir)
 {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 26484648d090..571024bc632e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -170,7 +170,7 @@ int __init btrfs_prelim_ref_init(void)
 	return 0;
 }
 
-void btrfs_prelim_ref_exit(void)
+void __cold btrfs_prelim_ref_exit(void)
 {
 	kmem_cache_destroy(btrfs_prelim_ref_cache);
 }
@@ -738,7 +738,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
 		BUG_ON(ref->key_for_search.type);
 		BUG_ON(!ref->wanted_disk_byte);
 
-		eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0);
+		eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
+				     ref->level - 1, NULL);
 		if (IS_ERR(eb)) {
 			free_pref(ref);
 			return PTR_ERR(eb);
@@ -773,15 +774,12 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
 	struct btrfs_delayed_extent_op *extent_op = head->extent_op;
 	struct btrfs_key key;
 	struct btrfs_key tmp_op_key;
-	struct btrfs_key *op_key = NULL;
 	struct rb_node *n;
 	int count;
 	int ret = 0;
 
-	if (extent_op && extent_op->update_key) {
+	if (extent_op && extent_op->update_key)
 		btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
-		op_key = &tmp_op_key;
-	}
 
 	spin_lock(&head->lock);
 	for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
@@ -1291,7 +1289,8 @@ again:
 			    ref->level == 0) {
 				struct extent_buffer *eb;
 
-				eb = read_tree_block(fs_info, ref->parent, 0);
+				eb = read_tree_block(fs_info, ref->parent, 0,
+						     ref->level, NULL);
 				if (IS_ERR(eb)) {
 					ret = PTR_ERR(eb);
 					goto out;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 0c2fab8514ff..0a30028d5196 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -73,7 +73,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
 int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr);
 
 int __init btrfs_prelim_ref_init(void);
-void btrfs_prelim_ref_exit(void);
+void __cold btrfs_prelim_ref_exit(void);
 
 struct prelim_ref {
 	struct rb_node rbnode;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 63f0ccc92a71..ca15be569d69 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -195,7 +195,6 @@ struct btrfs_inode {
 
 	/* Hook into fs_info->delayed_iputs */
 	struct list_head delayed_iput;
-	long delayed_iput_count;
 
 	/*
 	 * To avoid races between lockless (i_mutex not held) direct IO writes
@@ -365,6 +364,4 @@ static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
 			logical_start, csum, csum_expected, mirror_num);
 }
 
-bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
-
 #endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 7d51b5a5b505..3baebbc021c5 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -96,9 +96,9 @@
 #include <linux/blkdev.h>
 #include <linux/mm.h>
 #include <linux/string.h>
+#include <linux/crc32c.h>
 #include "ctree.h"
 #include "disk-io.h"
-#include "hash.h"
 #include "transaction.h"
 #include "extent_io.h"
 #include "volumes.h"
@@ -1736,7 +1736,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
 		size_t sublen = i ? PAGE_SIZE :
 				    (PAGE_SIZE - BTRFS_CSUM_SIZE);
 
-		crc = btrfs_crc32c(crc, data, sublen);
+		crc = crc32c(crc, data, sublen);
 	}
 	btrfs_csum_final(crc, csum);
 	if (memcmp(csum, h->csum, state->csum_size))
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 07d049c0c20f..562c3e633403 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1133,7 +1133,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
 	return ret;
 }
 
-void btrfs_exit_compress(void)
+void __cold btrfs_exit_compress(void)
 {
 	free_workspaces();
 }
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 677fa4aa0bd7..ce796557a918 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -76,7 +76,7 @@ struct compressed_bio {
 };
 
 void __init btrfs_init_compress(void);
-void btrfs_exit_compress(void);
+void __cold btrfs_exit_compress(void);
 
 int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
 			 u64 start, struct page **pages,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b88a79e69ddf..a2c9d21176e2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -41,8 +41,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 			      struct extent_buffer *src_buf);
 static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
 		    int level, int slot);
-static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
-				 struct extent_buffer *eb);
 
 struct btrfs_path *btrfs_alloc_path(void)
 {
@@ -301,11 +299,6 @@ enum mod_log_op {
 	MOD_LOG_ROOT_REPLACE,
 };
 
-struct tree_mod_move {
-	int dst_slot;
-	int nr_items;
-};
-
 struct tree_mod_root {
 	u64 logical;
 	u8 level;
@@ -328,32 +321,15 @@ struct tree_mod_elem {
 	u64 blockptr;
 
 	/* this is used for op == MOD_LOG_MOVE_KEYS */
-	struct tree_mod_move move;
+	struct {
+		int dst_slot;
+		int nr_items;
+	} move;
 
 	/* this is used for op == MOD_LOG_ROOT_REPLACE */
 	struct tree_mod_root old_root;
 };
 
-static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info)
-{
-	read_lock(&fs_info->tree_mod_log_lock);
-}
-
-static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info)
-{
-	read_unlock(&fs_info->tree_mod_log_lock);
-}
-
-static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info)
-{
-	write_lock(&fs_info->tree_mod_log_lock);
-}
-
-static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
-{
-	write_unlock(&fs_info->tree_mod_log_lock);
-}
-
 /*
  * Pull a new tree mod seq number for our operation.
  */
@@ -373,14 +349,14 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
 u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
 			   struct seq_list *elem)
 {
-	tree_mod_log_write_lock(fs_info);
+	write_lock(&fs_info->tree_mod_log_lock);
 	spin_lock(&fs_info->tree_mod_seq_lock);
 	if (!elem->seq) {
 		elem->seq = btrfs_inc_tree_mod_seq(fs_info);
 		list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
 	}
 	spin_unlock(&fs_info->tree_mod_seq_lock);
-	tree_mod_log_write_unlock(fs_info);
+	write_unlock(&fs_info->tree_mod_log_lock);
 
 	return elem->seq;
 }
@@ -422,7 +398,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 	 * anything that's lower than the lowest existing (read: blocked)
 	 * sequence number can be removed from the tree.
 	 */
-	tree_mod_log_write_lock(fs_info);
+	write_lock(&fs_info->tree_mod_log_lock);
 	tm_root = &fs_info->tree_mod_log;
 	for (node = rb_first(tm_root); node; node = next) {
 		next = rb_next(node);
@@ -432,7 +408,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
 		rb_erase(node, tm_root);
 		kfree(tm);
 	}
-	tree_mod_log_write_unlock(fs_info);
+	write_unlock(&fs_info->tree_mod_log_lock);
 }
 
 /*
@@ -443,7 +419,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
  * for root replace operations, or the logical address of the affected
  * block for all other operations.
  *
- * Note: must be called with write lock (tree_mod_log_write_lock).
+ * Note: must be called with write lock for fs_info::tree_mod_log_lock.
  */
 static noinline int
 __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -481,7 +457,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
  * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
  * returns zero with the tree_mod_log_lock acquired. The caller must hold
  * this until all tree mod log insertions are recorded in the rb tree and then
- * call tree_mod_log_write_unlock() to release.
+ * write unlock fs_info::tree_mod_log_lock.
  */
 static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
 				    struct extent_buffer *eb) {
@@ -491,9 +467,9 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
 	if (eb && btrfs_header_level(eb) == 0)
 		return 1;
 
-	tree_mod_log_write_lock(fs_info);
+	write_lock(&fs_info->tree_mod_log_lock);
 	if (list_empty(&(fs_info)->tree_mod_seq_list)) {
-		tree_mod_log_write_unlock(fs_info);
+		write_unlock(&fs_info->tree_mod_log_lock);
 		return 1;
 	}
 
@@ -536,38 +512,34 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
 	return tm;
 }
 
-static noinline int
-tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
-			struct extent_buffer *eb, int slot,
-			enum mod_log_op op, gfp_t flags)
+static noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
+		enum mod_log_op op, gfp_t flags)
 {
 	struct tree_mod_elem *tm;
 	int ret;
 
-	if (!tree_mod_need_log(fs_info, eb))
+	if (!tree_mod_need_log(eb->fs_info, eb))
 		return 0;
 
 	tm = alloc_tree_mod_elem(eb, slot, op, flags);
 	if (!tm)
 		return -ENOMEM;
 
-	if (tree_mod_dont_log(fs_info, eb)) {
+	if (tree_mod_dont_log(eb->fs_info, eb)) {
 		kfree(tm);
 		return 0;
 	}
 
-	ret = __tree_mod_log_insert(fs_info, tm);
-	tree_mod_log_write_unlock(fs_info);
+	ret = __tree_mod_log_insert(eb->fs_info, tm);
+	write_unlock(&eb->fs_info->tree_mod_log_lock);
 	if (ret)
 		kfree(tm);
 
 	return ret;
 }
 
-static noinline int
-tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
-			 struct extent_buffer *eb, int dst_slot, int src_slot,
-			 int nr_items)
+static noinline int tree_mod_log_insert_move(struct extent_buffer *eb,
+		int dst_slot, int src_slot, int nr_items)
 {
 	struct tree_mod_elem *tm = NULL;
 	struct tree_mod_elem **tm_list = NULL;
@@ -575,7 +547,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 	int i;
 	int locked = 0;
 
-	if (!tree_mod_need_log(fs_info, eb))
+	if (!tree_mod_need_log(eb->fs_info, eb))
 		return 0;
 
 	tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
@@ -603,7 +575,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 		}
 	}
 
-	if (tree_mod_dont_log(fs_info, eb))
+	if (tree_mod_dont_log(eb->fs_info, eb))
 		goto free_tms;
 	locked = 1;
 
@@ -613,26 +585,26 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
 	 * buffer, i.e. dst_slot < src_slot.
 	 */
 	for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
-		ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+		ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]);
 		if (ret)
 			goto free_tms;
 	}
 
-	ret = __tree_mod_log_insert(fs_info, tm);
+	ret = __tree_mod_log_insert(eb->fs_info, tm);
 	if (ret)
 		goto free_tms;
-	tree_mod_log_write_unlock(fs_info);
+	write_unlock(&eb->fs_info->tree_mod_log_lock);
 	kfree(tm_list);
 
 	return 0;
 free_tms:
 	for (i = 0; i < nr_items; i++) {
 		if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
-			rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+			rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
 		kfree(tm_list[i]);
 	}
 	if (locked)
-		tree_mod_log_write_unlock(fs_info);
+		write_unlock(&eb->fs_info->tree_mod_log_lock);
 	kfree(tm_list);
 	kfree(tm);
 
@@ -660,12 +632,10 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-static noinline int
-tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
-			 struct extent_buffer *old_root,
-			 struct extent_buffer *new_root,
-			 int log_removal)
+static noinline int tree_mod_log_insert_root(struct extent_buffer *old_root,
+			 struct extent_buffer *new_root, int log_removal)
 {
+	struct btrfs_fs_info *fs_info = old_root->fs_info;
 	struct tree_mod_elem *tm = NULL;
 	struct tree_mod_elem **tm_list = NULL;
 	int nritems = 0;
@@ -713,7 +683,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 	if (!ret)
 		ret = __tree_mod_log_insert(fs_info, tm);
 
-	tree_mod_log_write_unlock(fs_info);
+	write_unlock(&fs_info->tree_mod_log_lock);
 	if (ret)
 		goto free_tms;
 	kfree(tm_list);
@@ -740,7 +710,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
 	struct tree_mod_elem *cur = NULL;
 	struct tree_mod_elem *found = NULL;
 
-	tree_mod_log_read_lock(fs_info);
+	read_lock(&fs_info->tree_mod_log_lock);
 	tm_root = &fs_info->tree_mod_log;
 	node = tm_root->rb_node;
 	while (node) {
@@ -768,7 +738,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
 			break;
 		}
 	}
-	tree_mod_log_read_unlock(fs_info);
+	read_unlock(&fs_info->tree_mod_log_lock);
 
 	return found;
 }
@@ -849,7 +819,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
 			goto free_tms;
 	}
 
-	tree_mod_log_write_unlock(fs_info);
+	write_unlock(&fs_info->tree_mod_log_lock);
 	kfree(tm_list);
 
 	return 0;
@@ -861,36 +831,13 @@ free_tms:
 		kfree(tm_list[i]);
 	}
 	if (locked)
-		tree_mod_log_write_unlock(fs_info);
+		write_unlock(&fs_info->tree_mod_log_lock);
 	kfree(tm_list);
 
 	return ret;
 }
 
-static inline void
-tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
-		     int dst_offset, int src_offset, int nr_items)
-{
-	int ret;
-	ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
-				       nr_items);
-	BUG_ON(ret < 0);
-}
-
-static noinline void
-tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
-			  struct extent_buffer *eb, int slot, int atomic)
-{
-	int ret;
-
-	ret = tree_mod_log_insert_key(fs_info, eb, slot,
-					MOD_LOG_KEY_REPLACE,
-					atomic ? GFP_ATOMIC : GFP_NOFS);
-	BUG_ON(ret < 0);
-}
-
-static noinline int
-tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+static noinline int tree_mod_log_free_eb(struct extent_buffer *eb)
 {
 	struct tree_mod_elem **tm_list = NULL;
 	int nritems = 0;
@@ -900,7 +847,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
 	if (btrfs_header_level(eb) == 0)
 		return 0;
 
-	if (!tree_mod_need_log(fs_info, NULL))
+	if (!tree_mod_need_log(eb->fs_info, NULL))
 		return 0;
 
 	nritems = btrfs_header_nritems(eb);
@@ -917,11 +864,11 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
 		}
 	}
 
-	if (tree_mod_dont_log(fs_info, eb))
+	if (tree_mod_dont_log(eb->fs_info, eb))
 		goto free_tms;
 
-	ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
-	tree_mod_log_write_unlock(fs_info);
+	ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
+	write_unlock(&eb->fs_info->tree_mod_log_lock);
 	if (ret)
 		goto free_tms;
 	kfree(tm_list);
@@ -936,17 +883,6 @@ free_tms:
 	return ret;
 }
 
-static noinline void
-tree_mod_log_set_root_pointer(struct btrfs_root *root,
-			      struct extent_buffer *new_root_node,
-			      int log_removal)
-{
-	int ret;
-	ret = tree_mod_log_insert_root(root->fs_info, root->node,
-				       new_root_node, log_removal);
-	BUG_ON(ret < 0);
-}
-
 /*
  * check if the tree block can be shared by multiple trees
  */
@@ -1173,7 +1109,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 			parent_start = buf->start;
 
 		extent_buffer_get(cow);
-		tree_mod_log_set_root_pointer(root, cow, 1);
+		ret = tree_mod_log_insert_root(root->node, cow, 1);
+		BUG_ON(ret < 0);
 		rcu_assign_pointer(root->node, cow);
 
 		btrfs_free_tree_block(trans, root, buf, parent_start,
@@ -1182,7 +1119,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 		add_root_to_dirty_list(root);
 	} else {
 		WARN_ON(trans->transid != btrfs_header_generation(parent));
-		tree_mod_log_insert_key(fs_info, parent, parent_slot,
+		tree_mod_log_insert_key(parent, parent_slot,
 					MOD_LOG_KEY_REPLACE, GFP_NOFS);
 		btrfs_set_node_blockptr(parent, parent_slot,
 					cow->start);
@@ -1190,7 +1127,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 					      trans->transid);
 		btrfs_mark_buffer_dirty(parent);
 		if (last_ref) {
-			ret = tree_mod_log_free_eb(fs_info, buf);
+			ret = tree_mod_log_free_eb(buf);
 			if (ret) {
 				btrfs_abort_transaction(trans, ret);
 				return ret;
@@ -1211,9 +1148,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
  * returns the logical address of the oldest predecessor of the given root.
  * entries older than time_seq are ignored.
  */
-static struct tree_mod_elem *
-__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
-			   struct extent_buffer *eb_root, u64 time_seq)
+static struct tree_mod_elem *__tree_mod_log_oldest_root(
+		struct extent_buffer *eb_root, u64 time_seq)
 {
 	struct tree_mod_elem *tm;
 	struct tree_mod_elem *found = NULL;
@@ -1230,7 +1166,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
 	 * first operation that's logged for this root.
 	 */
 	while (1) {
-		tm = tree_mod_log_search_oldest(fs_info, root_logical,
+		tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical,
 						time_seq);
 		if (!looped && !tm)
 			return NULL;
@@ -1279,7 +1215,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
 	unsigned long p_size = sizeof(struct btrfs_key_ptr);
 
 	n = btrfs_header_nritems(eb);
-	tree_mod_log_read_lock(fs_info);
+	read_lock(&fs_info->tree_mod_log_lock);
 	while (tm && tm->seq >= time_seq) {
 		/*
 		 * all the operations are recorded with the operator used for
@@ -1334,7 +1270,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
 		if (tm->logical != first_tm->logical)
 			break;
 	}
-	tree_mod_log_read_unlock(fs_info);
+	read_unlock(&fs_info->tree_mod_log_lock);
 	btrfs_set_header_nritems(eb, n);
 }
 
@@ -1418,9 +1354,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 	struct tree_mod_root *old_root = NULL;
 	u64 old_generation = 0;
 	u64 logical;
+	int level;
 
 	eb_root = btrfs_read_lock_root_node(root);
-	tm = __tree_mod_log_oldest_root(fs_info, eb_root, time_seq);
+	tm = __tree_mod_log_oldest_root(eb_root, time_seq);
 	if (!tm)
 		return eb_root;
 
@@ -1428,15 +1365,17 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 		old_root = &tm->old_root;
 		old_generation = tm->generation;
 		logical = old_root->logical;
+		level = old_root->level;
 	} else {
 		logical = eb_root->start;
+		level = btrfs_header_level(eb_root);
 	}
 
 	tm = tree_mod_log_search(fs_info, logical, time_seq);
 	if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
 		btrfs_tree_read_unlock(eb_root);
 		free_extent_buffer(eb_root);
-		old = read_tree_block(fs_info, logical, 0);
+		old = read_tree_block(fs_info, logical, 0, level, NULL);
 		if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
 			if (!IS_ERR(old))
 				free_extent_buffer(old);
@@ -1484,7 +1423,7 @@ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
 	int level;
 	struct extent_buffer *eb_root = btrfs_root_node(root);
 
-	tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
+	tm = __tree_mod_log_oldest_root(eb_root, time_seq);
 	if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
 		level = tm->old_root.level;
 	} else {
@@ -1502,8 +1441,8 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
 	if (btrfs_is_testing(root->fs_info))
 		return 0;
 
-	/* ensure we can see the force_cow */
-	smp_rmb();
+	/* Ensure we can see the FORCE_COW bit */
+	smp_mb__before_atomic();
 
 	/*
 	 * We do not need to cow a block if
@@ -1656,6 +1595,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 	btrfs_set_lock_blocking(parent);
 
 	for (i = start_slot; i <= end_slot; i++) {
+		struct btrfs_key first_key;
 		int close = 1;
 
 		btrfs_node_key(parent, &disk_key, i);
@@ -1665,6 +1605,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 		progress_passed = 1;
 		blocknr = btrfs_node_blockptr(parent, i);
 		gen = btrfs_node_ptr_generation(parent, i);
+		btrfs_node_key_to_cpu(parent, &first_key, i);
 		if (last_block == 0)
 			last_block = blocknr;
 
@@ -1688,7 +1629,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 			uptodate = 0;
 		if (!cur || !uptodate) {
 			if (!cur) {
-				cur = read_tree_block(fs_info, blocknr, gen);
+				cur = read_tree_block(fs_info, blocknr, gen,
+						      parent_level - 1,
+						      &first_key);
 				if (IS_ERR(cur)) {
 					return PTR_ERR(cur);
 				} else if (!extent_buffer_uptodate(cur)) {
@@ -1696,7 +1639,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
 					return -EIO;
 				}
 			} else if (!uptodate) {
-				err = btrfs_read_buffer(cur, gen);
+				err = btrfs_read_buffer(cur, gen,
+						parent_level - 1,&first_key);
 				if (err) {
 					free_extent_buffer(cur);
 					return err;
@@ -1849,14 +1793,17 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent,
 {
 	int level = btrfs_header_level(parent);
 	struct extent_buffer *eb;
+	struct btrfs_key first_key;
 
 	if (slot < 0 || slot >= btrfs_header_nritems(parent))
 		return ERR_PTR(-ENOENT);
 
 	BUG_ON(level == 0);
 
+	btrfs_node_key_to_cpu(parent, &first_key, slot);
 	eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
-			     btrfs_node_ptr_generation(parent, slot));
+			     btrfs_node_ptr_generation(parent, slot),
+			     level - 1, &first_key);
 	if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
 		free_extent_buffer(eb);
 		eb = ERR_PTR(-EIO);
@@ -1928,7 +1875,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 			goto enospc;
 		}
 
-		tree_mod_log_set_root_pointer(root, child, 1);
+		ret = tree_mod_log_insert_root(root->node, child, 1);
+		BUG_ON(ret < 0);
 		rcu_assign_pointer(root->node, child);
 
 		add_root_to_dirty_list(root);
@@ -2007,8 +1955,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		} else {
 			struct btrfs_disk_key right_key;
 			btrfs_node_key(right, &right_key, 0);
-			tree_mod_log_set_node_key(fs_info, parent,
-						  pslot + 1, 0);
+			ret = tree_mod_log_insert_key(parent, pslot + 1,
+					MOD_LOG_KEY_REPLACE, GFP_NOFS);
+			BUG_ON(ret < 0);
 			btrfs_set_node_key(parent, &right_key, pslot + 1);
 			btrfs_mark_buffer_dirty(parent);
 		}
@@ -2052,7 +2001,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		/* update the parent key to reflect our changes */
 		struct btrfs_disk_key mid_key;
 		btrfs_node_key(mid, &mid_key, 0);
-		tree_mod_log_set_node_key(fs_info, parent, pslot, 0);
+		ret = tree_mod_log_insert_key(parent, pslot,
+				MOD_LOG_KEY_REPLACE, GFP_NOFS);
+		BUG_ON(ret < 0);
 		btrfs_set_node_key(parent, &mid_key, pslot);
 		btrfs_mark_buffer_dirty(parent);
 	}
@@ -2153,7 +2104,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 			struct btrfs_disk_key disk_key;
 			orig_slot += left_nr;
 			btrfs_node_key(mid, &disk_key, 0);
-			tree_mod_log_set_node_key(fs_info, parent, pslot, 0);
+			ret = tree_mod_log_insert_key(parent, pslot,
+					MOD_LOG_KEY_REPLACE, GFP_NOFS);
+			BUG_ON(ret < 0);
 			btrfs_set_node_key(parent, &disk_key, pslot);
 			btrfs_mark_buffer_dirty(parent);
 			if (btrfs_header_nritems(left) > orig_slot) {
@@ -2207,8 +2160,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 			struct btrfs_disk_key disk_key;
 
 			btrfs_node_key(right, &disk_key, 0);
-			tree_mod_log_set_node_key(fs_info, parent,
-						  pslot + 1, 0);
+			ret = tree_mod_log_insert_key(parent, pslot + 1,
+					MOD_LOG_KEY_REPLACE, GFP_NOFS);
+			BUG_ON(ret < 0);
 			btrfs_set_node_key(parent, &disk_key, pslot + 1);
 			btrfs_mark_buffer_dirty(parent);
 
@@ -2445,10 +2399,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 	u64 gen;
 	struct extent_buffer *b = *eb_ret;
 	struct extent_buffer *tmp;
+	struct btrfs_key first_key;
 	int ret;
+	int parent_level;
 
 	blocknr = btrfs_node_blockptr(b, slot);
 	gen = btrfs_node_ptr_generation(b, slot);
+	parent_level = btrfs_header_level(b);
+	btrfs_node_key_to_cpu(b, &first_key, slot);
 
 	tmp = find_extent_buffer(fs_info, blocknr);
 	if (tmp) {
@@ -2467,7 +2425,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 		btrfs_set_path_blocking(p);
 
 		/* now we're allowed to do a blocking uptodate check */
-		ret = btrfs_read_buffer(tmp, gen);
+		ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
 		if (!ret) {
 			*eb_ret = tmp;
 			return 0;
@@ -2494,7 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 	btrfs_release_path(p);
 
 	ret = -EAGAIN;
-	tmp = read_tree_block(fs_info, blocknr, 0);
+	tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
+			      &first_key);
 	if (!IS_ERR(tmp)) {
 		/*
 		 * If the read above didn't mark this buffer up to date,
@@ -3161,13 +3120,17 @@ static void fixup_low_keys(struct btrfs_fs_info *fs_info,
 {
 	int i;
 	struct extent_buffer *t;
+	int ret;
 
 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
 		int tslot = path->slots[i];
+
 		if (!path->nodes[i])
 			break;
 		t = path->nodes[i];
-		tree_mod_log_set_node_key(fs_info, t, tslot, 1);
+		ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE,
+				GFP_ATOMIC);
+		BUG_ON(ret < 0);
 		btrfs_set_node_key(t, key, tslot);
 		btrfs_mark_buffer_dirty(path->nodes[i]);
 		if (tslot != 0)
@@ -3264,8 +3227,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
 
 	if (push_items < src_nritems) {
 		/*
-		 * don't call tree_mod_log_eb_move here, key removal was already
-		 * fully logged by tree_mod_log_eb_copy above.
+		 * Don't call tree_mod_log_insert_move here, key removal was
+		 * already fully logged by tree_mod_log_eb_copy above.
 		 */
 		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
 				      btrfs_node_key_ptr_offset(push_items),
@@ -3320,7 +3283,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 	if (max_push < push_items)
 		push_items = max_push;
 
-	tree_mod_log_eb_move(fs_info, dst, push_items, 0, dst_nritems);
+	ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
+	BUG_ON(ret < 0);
 	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
 				      btrfs_node_key_ptr_offset(0),
 				      (dst_nritems) *
@@ -3363,6 +3327,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 	struct extent_buffer *c;
 	struct extent_buffer *old;
 	struct btrfs_disk_key lower_key;
+	int ret;
 
 	BUG_ON(path->nodes[level]);
 	BUG_ON(path->nodes[level-1] != root->node);
@@ -3401,7 +3366,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(c);
 
 	old = root->node;
-	tree_mod_log_set_root_pointer(root, c, 0);
+	ret = tree_mod_log_insert_root(root->node, c, 0);
+	BUG_ON(ret < 0);
 	rcu_assign_pointer(root->node, c);
 
 	/* the super has an extra ref to root->node */
@@ -3438,17 +3404,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
 	BUG_ON(slot > nritems);
 	BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(fs_info));
 	if (slot != nritems) {
-		if (level)
-			tree_mod_log_eb_move(fs_info, lower, slot + 1,
-					     slot, nritems - slot);
+		if (level) {
+			ret = tree_mod_log_insert_move(lower, slot + 1, slot,
+					nritems - slot);
+			BUG_ON(ret < 0);
+		}
 		memmove_extent_buffer(lower,
 			      btrfs_node_key_ptr_offset(slot + 1),
 			      btrfs_node_key_ptr_offset(slot),
 			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
 	}
 	if (level) {
-		ret = tree_mod_log_insert_key(fs_info, lower, slot,
-					      MOD_LOG_KEY_ADD, GFP_NOFS);
+		ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD,
+				GFP_NOFS);
 		BUG_ON(ret < 0);
 	}
 	btrfs_set_node_key(lower, key, slot);
@@ -4911,17 +4879,19 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
 
 	nritems = btrfs_header_nritems(parent);
 	if (slot != nritems - 1) {
-		if (level)
-			tree_mod_log_eb_move(fs_info, parent, slot,
-					     slot + 1, nritems - slot - 1);
+		if (level) {
+			ret = tree_mod_log_insert_move(parent, slot, slot + 1,
+					nritems - slot - 1);
+			BUG_ON(ret < 0);
+		}
 		memmove_extent_buffer(parent,
 			      btrfs_node_key_ptr_offset(slot),
 			      btrfs_node_key_ptr_offset(slot + 1),
 			      sizeof(struct btrfs_key_ptr) *
 			      (nritems - slot - 1));
 	} else if (level) {
-		ret = tree_mod_log_insert_key(fs_info, parent, slot,
-					      MOD_LOG_KEY_REMOVE, GFP_NOFS);
+		ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE,
+				GFP_NOFS);
 		BUG_ON(ret < 0);
 	}
 
@@ -5145,9 +5115,6 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
  * into min_key, so you can call btrfs_search_slot with cow=1 on the
  * key and get a writable path.
  *
- * This does lock as it descends, and path->keep_locks should be set
- * to 1 by the caller.
- *
  * This honors path->lowest_level to prevent descent past a given level
  * of the tree.
  *
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index da308774b8a4..0eb55825862a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -40,6 +40,7 @@
 #include <linux/sizes.h>
 #include <linux/dynamic_debug.h>
 #include <linux/refcount.h>
+#include <linux/crc32c.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "async-thread.h"
@@ -65,6 +66,8 @@ struct btrfs_ordered_sum;
 
 #define BTRFS_MAX_LEVEL 8
 
+#define BTRFS_OLDEST_GENERATION	0ULL
+
 #define BTRFS_COMPAT_EXTENT_TREE_V0
 
 /*
@@ -86,9 +89,9 @@ struct btrfs_ordered_sum;
  */
 #define BTRFS_LINK_MAX 65535U
 
+/* four bytes for CRC32 */
 static const int btrfs_csum_sizes[] = { 4 };
 
-/* four bytes for CRC32 */
 #define BTRFS_EMPTY_DIR_SIZE 0
 
 /* ioprio of readahead is set to idle */
@@ -98,6 +101,7 @@ static const int btrfs_csum_sizes[] = { 4 };
 
 #define BTRFS_MAX_EXTENT_SIZE SZ_128M
 
+
 /*
  * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
  */
@@ -381,8 +385,9 @@ struct btrfs_dev_replace {
 
 /* For raid type sysfs entries */
 struct raid_kobject {
-	int raid_type;
+	u64 flags;
 	struct kobject kobj;
+	struct list_head list;
 };
 
 struct btrfs_space_info {
@@ -707,7 +712,6 @@ struct btrfs_delayed_root;
 #define BTRFS_FS_LOG_RECOVERING			4
 #define BTRFS_FS_OPEN				5
 #define BTRFS_FS_QUOTA_ENABLED			6
-#define BTRFS_FS_QUOTA_ENABLING			7
 #define BTRFS_FS_UPDATE_UUID_TREE_GEN		9
 #define BTRFS_FS_CREATING_FREE_SPACE_TREE	10
 #define BTRFS_FS_BTREE_ERR			11
@@ -788,7 +792,7 @@ struct btrfs_fs_info {
 	unsigned long pending_changes;
 	unsigned long compress_type:4;
 	unsigned int compress_level;
-	int commit_interval;
+	u32 commit_interval;
 	/*
 	 * It is a suggestive number, the read side is safe even it gets a
 	 * wrong number because we will write out the data into a regular
@@ -877,7 +881,6 @@ struct btrfs_fs_info {
 	struct rb_root tree_mod_log;
 
 	atomic_t async_delalloc_pages;
-	atomic_t open_ioctl_trans;
 
 	/*
 	 * this is used to protect the following list -- ordered_roots.
@@ -935,9 +938,11 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *extent_workers;
 	struct task_struct *transaction_kthread;
 	struct task_struct *cleaner_kthread;
-	int thread_pool_size;
+	u32 thread_pool_size;
 
 	struct kobject *space_info_kobj;
+	struct list_head pending_raid_kobjs;
+	spinlock_t pending_raid_kobjs_lock; /* uncontended */
 
 	u64 total_pinned;
 
@@ -952,9 +957,9 @@ struct btrfs_fs_info {
 	struct btrfs_fs_devices *fs_devices;
 
 	/*
-	 * the space_info list is almost entirely read only.  It only changes
-	 * when we add a new raid type to the FS, and that happens
-	 * very rarely.  RCU is used to protect it.
+	 * The space_info list is effectively read only after initial
+	 * setup.  It is populated at mount time and cleaned up after
+	 * all block groups are removed.  RCU is used to protect it.
 	 */
 	struct list_head space_info;
 
@@ -993,8 +998,8 @@ struct btrfs_fs_info {
 	struct btrfs_balance_control *balance_ctl;
 	wait_queue_head_t balance_wait_q;
 
-	unsigned data_chunk_allocations;
-	unsigned metadata_ratio;
+	u32 data_chunk_allocations;
+	u32 metadata_ratio;
 
 	void *bdev_holder;
 
@@ -1260,12 +1265,13 @@ struct btrfs_root {
 	struct btrfs_subvolume_writers *subv_writers;
 	atomic_t will_be_snapshotted;
 
-	/* For qgroup metadata space reserve */
-	atomic64_t qgroup_meta_rsv;
+	/* For qgroup metadata reserved space */
+	spinlock_t qgroup_meta_rsv_lock;
+	u64 qgroup_meta_rsv_pertrans;
+	u64 qgroup_meta_rsv_prealloc;
 };
 
 struct btrfs_file_private {
-	struct btrfs_trans_handle *trans;
 	void *filldir_buf;
 };
 
@@ -2554,6 +2560,20 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
 	((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \
 	btrfs_item_offset_nr(leaf, slot)))
 
+static inline u64 btrfs_name_hash(const char *name, int len)
+{
+       return crc32c((u32)~1, name, len);
+}
+
+/*
+ * Figure the key offset of an extended inode ref
+ */
+static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
+                                   int len)
+{
+       return (u64) crc32c(parent_objectid, name, len);
+}
+
 static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
 {
 	return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
@@ -2608,7 +2628,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
 void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
-			   struct btrfs_fs_info *fs_info, unsigned long count);
+			   unsigned long count);
 int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
 				 unsigned long count, u64 transid, int wait);
 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
@@ -2628,7 +2648,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
 						 u64 bytenr);
 void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
-int get_block_group_index(struct btrfs_block_group_cache *cache);
 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 					     struct btrfs_root *root,
 					     u64 parent, u64 root_objectid,
@@ -2668,15 +2687,13 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
 int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
 				       u64 start, u64 len);
 void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
-			       struct btrfs_fs_info *fs_info);
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
 			 u64 root_objectid, u64 owner, u64 offset);
 
-int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info);
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 				   struct btrfs_fs_info *fs_info);
 int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
@@ -2688,6 +2705,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 			   struct btrfs_fs_info *fs_info, u64 bytes_used,
 			   u64 type, u64 chunk_offset, u64 size);
+void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
 				struct btrfs_fs_info *fs_info,
 				const u64 chunk_offset);
@@ -2697,8 +2715,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
 void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
-void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
-				       struct btrfs_fs_info *fs_info);
+void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
 u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info);
 u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info);
 u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info);
@@ -2730,11 +2747,10 @@ int btrfs_check_data_free_space(struct inode *inode,
 void btrfs_free_reserved_data_space(struct inode *inode,
 			struct extent_changeset *reserved, u64 start, u64 len);
 void btrfs_delalloc_release_space(struct inode *inode,
-			struct extent_changeset *reserved, u64 start, u64 len);
+				  struct extent_changeset *reserved,
+				  u64 start, u64 len, bool qgroup_free);
 void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
 					    u64 len);
-void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info);
 void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 				  struct btrfs_inode *inode);
@@ -2745,10 +2761,12 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     u64 *qgroup_reserved, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 				      struct btrfs_block_rsv *rsv);
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+				    bool qgroup_free);
 
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+				     bool qgroup_free);
 int btrfs_delalloc_reserve_space(struct inode *inode,
 			struct extent_changeset **reserved, u64 start, u64 len);
 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
@@ -2792,7 +2810,6 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
 int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
 int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 					 struct btrfs_fs_info *fs_info);
-int __get_raid_index(u64 flags);
 int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
 void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
 void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
@@ -3195,8 +3212,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
 int btrfs_drop_inode(struct inode *inode);
 int __init btrfs_init_cachep(void);
-void btrfs_destroy_cachep(void);
-long btrfs_ioctl_trans_end(struct file *file);
+void __cold btrfs_destroy_cachep(void);
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 			 struct btrfs_root *root, int *was_new);
 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
@@ -3246,7 +3262,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
-void btrfs_auto_defrag_exit(void);
+void __cold btrfs_auto_defrag_exit(void);
 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 			   struct btrfs_inode *inode);
 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
@@ -3281,25 +3297,23 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
 /* sysfs.c */
 int __init btrfs_init_sysfs(void);
-void btrfs_exit_sysfs(void);
+void __cold btrfs_exit_sysfs(void);
 int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
 void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
 
-/* xattr.c */
-ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-
 /* super.c */
 int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			unsigned long new_flags);
 int btrfs_sync_fs(struct super_block *sb, int wait);
 
-static inline __printf(2, 3)
+static inline __printf(2, 3) __cold
 void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 {
 }
 
 #ifdef CONFIG_PRINTK
 __printf(2, 3)
+__cold
 void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
 #else
 #define btrfs_printk(fs_info, fmt, args...) \
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0530f6f2e4ba..86ec2edc05e8 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -23,6 +23,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "ctree.h"
+#include "qgroup.h"
 
 #define BTRFS_DELAYED_WRITEBACK		512
 #define BTRFS_DELAYED_BACKGROUND	128
@@ -42,7 +43,7 @@ int __init btrfs_delayed_inode_init(void)
 	return 0;
 }
 
-void btrfs_delayed_inode_exit(void)
+void __cold btrfs_delayed_inode_exit(void)
 {
 	kmem_cache_destroy(delayed_node_cache);
 }
@@ -552,11 +553,12 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item(
 }
 
 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
-					       struct btrfs_fs_info *fs_info,
+					       struct btrfs_root *root,
 					       struct btrfs_delayed_item *item)
 {
 	struct btrfs_block_rsv *src_rsv;
 	struct btrfs_block_rsv *dst_rsv;
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 num_bytes;
 	int ret;
 
@@ -578,15 +580,17 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-static void btrfs_delayed_item_release_metadata(struct btrfs_fs_info *fs_info,
+static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
 						struct btrfs_delayed_item *item)
 {
 	struct btrfs_block_rsv *rsv;
+	struct btrfs_fs_info *fs_info = root->fs_info;
 
 	if (!item->bytes_reserved)
 		return;
 
 	rsv = &fs_info->delayed_block_rsv;
+	btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved);
 	trace_btrfs_space_reservation(fs_info, "delayed_item",
 				      item->key.objectid, item->bytes_reserved,
 				      0);
@@ -611,6 +615,9 @@ static int btrfs_delayed_inode_reserve_metadata(
 
 	num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
+	ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+	if (ret < 0)
+		return ret;
 	/*
 	 * btrfs_dirty_inode will update the inode under btrfs_join_transaction
 	 * which doesn't reserve space for speed.  This is a problem since we
@@ -630,8 +637,10 @@ static int btrfs_delayed_inode_reserve_metadata(
 		 * EAGAIN to make us stop the transaction we have, so return
 		 * ENOSPC instead so that btrfs_dirty_inode knows what to do.
 		 */
-		if (ret == -EAGAIN)
+		if (ret == -EAGAIN) {
 			ret = -ENOSPC;
+			btrfs_qgroup_free_meta_prealloc(root, num_bytes);
+		}
 		if (!ret) {
 			node->bytes_reserved = num_bytes;
 			trace_btrfs_space_reservation(fs_info,
@@ -653,7 +662,8 @@ static int btrfs_delayed_inode_reserve_metadata(
 }
 
 static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
-						struct btrfs_delayed_node *node)
+						struct btrfs_delayed_node *node,
+						bool qgroup_free)
 {
 	struct btrfs_block_rsv *rsv;
 
@@ -665,6 +675,12 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
 				      node->inode_id, node->bytes_reserved, 0);
 	btrfs_block_rsv_release(fs_info, rsv,
 				node->bytes_reserved);
+	if (qgroup_free)
+		btrfs_qgroup_free_meta_prealloc(node->root,
+				node->bytes_reserved);
+	else
+		btrfs_qgroup_convert_reserved_meta(node->root,
+				node->bytes_reserved);
 	node->bytes_reserved = 0;
 }
 
@@ -766,7 +782,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
 				    curr->data_len);
 		slot++;
 
-		btrfs_delayed_item_release_metadata(fs_info, curr);
+		btrfs_delayed_item_release_metadata(root, curr);
 
 		list_del(&curr->tree_list);
 		btrfs_release_delayed_item(curr);
@@ -788,7 +804,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 				     struct btrfs_path *path,
 				     struct btrfs_delayed_item *delayed_item)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct extent_buffer *leaf;
 	char *ptr;
 	int ret;
@@ -806,7 +821,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 			    delayed_item->data_len);
 	btrfs_mark_buffer_dirty(leaf);
 
-	btrfs_delayed_item_release_metadata(fs_info, delayed_item);
+	btrfs_delayed_item_release_metadata(root, delayed_item);
 	return 0;
 }
 
@@ -858,7 +873,6 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
 				    struct btrfs_path *path,
 				    struct btrfs_delayed_item *item)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_delayed_item *curr, *next;
 	struct extent_buffer *leaf;
 	struct btrfs_key key;
@@ -908,7 +922,7 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
 		goto out;
 
 	list_for_each_entry_safe(curr, next, &head, tree_list) {
-		btrfs_delayed_item_release_metadata(fs_info, curr);
+		btrfs_delayed_item_release_metadata(root, curr);
 		list_del(&curr->tree_list);
 		btrfs_release_delayed_item(curr);
 	}
@@ -1051,7 +1065,7 @@ out:
 no_iref:
 	btrfs_release_path(path);
 err_out:
-	btrfs_delayed_inode_release_metadata(fs_info, node);
+	btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
 	btrfs_release_delayed_inode(node);
 
 	return ret;
@@ -1115,9 +1129,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
  * Returns < 0 on error and returns with an aborted transaction with any
  * outstanding delayed items cleaned up.
  */
-static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
-				     struct btrfs_fs_info *fs_info, int nr)
+static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_delayed_root *delayed_root;
 	struct btrfs_delayed_node *curr_node, *prev_node;
 	struct btrfs_path *path;
@@ -1162,16 +1176,14 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info)
+int btrfs_run_delayed_items(struct btrfs_trans_handle *trans)
 {
-	return __btrfs_run_delayed_items(trans, fs_info, -1);
+	return __btrfs_run_delayed_items(trans, -1);
 }
 
-int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
-			       struct btrfs_fs_info *fs_info, int nr)
+int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
 {
-	return __btrfs_run_delayed_items(trans, fs_info, nr);
+	return __btrfs_run_delayed_items(trans, nr);
 }
 
 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
@@ -1443,7 +1455,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 	btrfs_set_stack_dir_type(dir_item, type);
 	memcpy((char *)(dir_item + 1), name, name_len);
 
-	ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, delayed_item);
+	ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, delayed_item);
 	/*
 	 * we have reserved enough space when we start a new transaction,
 	 * so reserving metadata failure is impossible
@@ -1480,7 +1492,7 @@ static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
 		return 1;
 	}
 
-	btrfs_delayed_item_release_metadata(fs_info, item);
+	btrfs_delayed_item_release_metadata(node->root, item);
 	btrfs_release_delayed_item(item);
 	mutex_unlock(&node->mutex);
 	return 0;
@@ -1515,7 +1527,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
 
 	item->key = item_key;
 
-	ret = btrfs_delayed_item_reserve_metadata(trans, fs_info, item);
+	ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
 	/*
 	 * we have reserved enough space when we start a new transaction,
 	 * so reserving metadata failure is impossible.
@@ -1880,7 +1892,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
 	mutex_lock(&delayed_node->mutex);
 	curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
 	while (curr_item) {
-		btrfs_delayed_item_release_metadata(fs_info, curr_item);
+		btrfs_delayed_item_release_metadata(root, curr_item);
 		prev_item = curr_item;
 		curr_item = __btrfs_next_delayed_item(prev_item);
 		btrfs_release_delayed_item(prev_item);
@@ -1888,7 +1900,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
 
 	curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
 	while (curr_item) {
-		btrfs_delayed_item_release_metadata(fs_info, curr_item);
+		btrfs_delayed_item_release_metadata(root, curr_item);
 		prev_item = curr_item;
 		curr_item = __btrfs_next_delayed_item(prev_item);
 		btrfs_release_delayed_item(prev_item);
@@ -1898,7 +1910,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
 		btrfs_release_delayed_iref(delayed_node);
 
 	if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
-		btrfs_delayed_inode_release_metadata(fs_info, delayed_node);
+		btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
 		btrfs_release_delayed_inode(delayed_node);
 	}
 	mutex_unlock(&delayed_node->mutex);
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index c4189d495934..100a91e26b55 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -111,10 +111,8 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
 
 int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode);
 
-int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
-			    struct btrfs_fs_info *fs_info);
-int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
-			       struct btrfs_fs_info *fs_info, int nr);
+int btrfs_run_delayed_items(struct btrfs_trans_handle *trans);
+int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr);
 
 void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info);
 
@@ -151,7 +149,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 
 /* for init */
 int __init btrfs_delayed_inode_init(void);
-void btrfs_delayed_inode_exit(void);
+void __cold btrfs_delayed_inode_exit(void);
 
 /* for debugging */
 void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 7ab5e0128f0c..2677257c149d 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -216,7 +216,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
 	struct btrfs_delayed_ref_root *delayed_refs;
 
 	delayed_refs = &trans->transaction->delayed_refs;
-	assert_spin_locked(&delayed_refs->lock);
+	lockdep_assert_held(&delayed_refs->lock);
 	if (mutex_trylock(&head->mutex))
 		return 0;
 
@@ -239,7 +239,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
 				    struct btrfs_delayed_ref_head *head,
 				    struct btrfs_delayed_ref_node *ref)
 {
-	assert_spin_locked(&head->lock);
+	lockdep_assert_held(&head->lock);
 	rb_erase(&ref->ref_node, &head->ref_tree);
 	RB_CLEAR_NODE(&ref->ref_node);
 	if (!list_empty(&ref->add_list))
@@ -307,7 +307,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
 	struct rb_node *node;
 	u64 seq = 0;
 
-	assert_spin_locked(&head->lock);
+	lockdep_assert_held(&head->lock);
 
 	if (RB_EMPTY_ROOT(&head->ref_tree))
 		return;
@@ -930,7 +930,7 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt
 	return find_ref_head(&delayed_refs->href_root, bytenr, 0);
 }
 
-void btrfs_delayed_ref_exit(void)
+void __cold btrfs_delayed_ref_exit(void)
 {
 	kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
 	kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index c4f625e5a691..9e3e5aff0937 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -204,7 +204,7 @@ extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
 extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
 
 int __init btrfs_delayed_ref_init(void);
-void btrfs_delayed_ref_exit(void);
+void __cold btrfs_delayed_ref_exit(void);
 
 static inline struct btrfs_delayed_extent_op *
 btrfs_alloc_delayed_extent_op(void)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 7efbc4d1128b..0d203633bb96 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -44,7 +44,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
 						struct btrfs_fs_info *fs_info,
 						struct btrfs_device *srcdev,
 						struct btrfs_device *tgtdev);
-static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
 static int btrfs_dev_replace_kthread(void *data);
 static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
 
@@ -174,8 +173,14 @@ no_valid_dev_replace_entry_found:
 			}
 			set_bit(BTRFS_DEV_STATE_REPLACE_TGT,
 				&dev_replace->tgtdev->dev_state);
-			btrfs_init_dev_replace_tgtdev_for_resume(fs_info,
-				dev_replace->tgtdev);
+
+			WARN_ON(fs_info->fs_devices->rw_devices == 0);
+			dev_replace->tgtdev->io_width = fs_info->sectorsize;
+			dev_replace->tgtdev->io_align = fs_info->sectorsize;
+			dev_replace->tgtdev->sector_size = fs_info->sectorsize;
+			dev_replace->tgtdev->fs_info = fs_info;
+			set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
+				&dev_replace->tgtdev->dev_state);
 		}
 		break;
 	}
@@ -200,13 +205,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
 	struct btrfs_dev_replace_item *ptr;
 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
-	btrfs_dev_replace_lock(dev_replace, 0);
+	btrfs_dev_replace_read_lock(dev_replace);
 	if (!dev_replace->is_valid ||
 	    !dev_replace->item_needs_writeback) {
-		btrfs_dev_replace_unlock(dev_replace, 0);
+		btrfs_dev_replace_read_unlock(dev_replace);
 		return 0;
 	}
-	btrfs_dev_replace_unlock(dev_replace, 0);
+	btrfs_dev_replace_read_unlock(dev_replace);
 
 	key.objectid = 0;
 	key.type = BTRFS_DEV_REPLACE_KEY;
@@ -264,7 +269,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
 	ptr = btrfs_item_ptr(eb, path->slots[0],
 			     struct btrfs_dev_replace_item);
 
-	btrfs_dev_replace_lock(dev_replace, 1);
+	btrfs_dev_replace_write_lock(dev_replace);
 	if (dev_replace->srcdev)
 		btrfs_set_dev_replace_src_devid(eb, ptr,
 			dev_replace->srcdev->devid);
@@ -287,7 +292,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
 	btrfs_set_dev_replace_cursor_right(eb, ptr,
 		dev_replace->cursor_right);
 	dev_replace->item_needs_writeback = 0;
-	btrfs_dev_replace_unlock(dev_replace, 1);
+	btrfs_dev_replace_write_unlock(dev_replace);
 
 	btrfs_mark_buffer_dirty(eb);
 
@@ -307,7 +312,7 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
 
 static char* btrfs_dev_name(struct btrfs_device *device)
 {
-	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
+	if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
 		return "<missing disk>";
 	else
 		return rcu_str_deref(device->name);
@@ -352,7 +357,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 		return PTR_ERR(trans);
 	}
 
-	btrfs_dev_replace_lock(dev_replace, 1);
+	btrfs_dev_replace_write_lock(dev_replace);
 	switch (dev_replace->replace_state) {
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -390,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 	dev_replace->item_needs_writeback = 1;
 	atomic64_set(&dev_replace->num_write_errors, 0);
 	atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
-	btrfs_dev_replace_unlock(dev_replace, 1);
+	btrfs_dev_replace_write_unlock(dev_replace);
 
 	ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
 	if (ret)
@@ -402,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
-		btrfs_dev_replace_lock(dev_replace, 1);
+		btrfs_dev_replace_write_lock(dev_replace);
 		goto leave;
 	}
 
@@ -426,7 +431,7 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 leave:
 	dev_replace->srcdev = NULL;
 	dev_replace->tgtdev = NULL;
-	btrfs_dev_replace_unlock(dev_replace, 1);
+	btrfs_dev_replace_write_unlock(dev_replace);
 	btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
 	return ret;
 }
@@ -493,18 +498,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	/* don't allow cancel or unmount to disturb the finishing procedure */
 	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
 
-	btrfs_dev_replace_lock(dev_replace, 0);
+	btrfs_dev_replace_read_lock(dev_replace);
 	/* was the operation canceled, or is it finished? */
 	if (dev_replace->replace_state !=
 	    BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
-		btrfs_dev_replace_unlock(dev_replace, 0);
+		btrfs_dev_replace_read_unlock(dev_replace);
 		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 		return 0;
 	}
 
 	tgt_device = dev_replace->tgtdev;
 	src_device = dev_replace->srcdev;
-	btrfs_dev_replace_unlock(dev_replace, 0);
+	btrfs_dev_replace_read_unlock(dev_replace);
 
 	/*
 	 * flush all outstanding I/O and inode extent mappings before the
@@ -529,7 +534,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	/* keep away write_all_supers() during the finishing procedure */
 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
 	mutex_lock(&fs_info->chunk_mutex);
-	btrfs_dev_replace_lock(dev_replace, 1);
+	btrfs_dev_replace_write_lock(dev_replace);
 	dev_replace->replace_state =
 		scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
 			  : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@@ -549,7 +554,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 				 btrfs_dev_name(src_device),
 				 src_device->devid,
 				 rcu_str_deref(tgt_device->name), scrub_ret);
-		btrfs_dev_replace_unlock(dev_replace, 1);
+		btrfs_dev_replace_write_unlock(dev_replace);
 		mutex_unlock(&fs_info->chunk_mutex);
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 		mutex_unlock(&uuid_mutex);
@@ -586,7 +591,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
 	fs_info->fs_devices->rw_devices++;
 
-	btrfs_dev_replace_unlock(dev_replace, 1);
+	btrfs_dev_replace_write_unlock(dev_replace);
 
 	btrfs_rm_dev_replace_blocked(fs_info);
 
@@ -679,7 +684,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
-	btrfs_dev_replace_lock(dev_replace, 0);
+	btrfs_dev_replace_read_lock(dev_replace);
 	/* even if !dev_replace_is_valid, the values are good enough for
 	 * the replace_status ioctl */
 	args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@@ -691,41 +696,36 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 	args->status.num_uncorrectable_read_errors =
 		atomic64_read(&dev_replace->num_uncorrectable_read_errors);
 	args->status.progress_1000 = btrfs_dev_replace_progress(fs_info);
-	btrfs_dev_replace_unlock(dev_replace, 0);
+	btrfs_dev_replace_read_unlock(dev_replace);
 }
 
-int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
-			     struct btrfs_ioctl_dev_replace_args *args)
-{
-	args->result = __btrfs_dev_replace_cancel(fs_info);
-	return 0;
-}
-
-static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
+int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 	struct btrfs_device *tgt_device = NULL;
+	struct btrfs_device *src_device = NULL;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = fs_info->tree_root;
-	u64 result;
+	int result;
 	int ret;
 
 	if (sb_rdonly(fs_info->sb))
 		return -EROFS;
 
 	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
-	btrfs_dev_replace_lock(dev_replace, 1);
+	btrfs_dev_replace_write_lock(dev_replace);
 	switch (dev_replace->replace_state) {
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
 		result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
-		btrfs_dev_replace_unlock(dev_replace, 1);
+		btrfs_dev_replace_write_unlock(dev_replace);
 		goto leave;
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
 		result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
 		tgt_device = dev_replace->tgtdev;
+		src_device = dev_replace->srcdev;
 		dev_replace->tgtdev = NULL;
 		dev_replace->srcdev = NULL;
 		break;
@@ -733,7 +733,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
 	dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
 	dev_replace->time_stopped = get_seconds();
 	dev_replace->item_needs_writeback = 1;
-	btrfs_dev_replace_unlock(dev_replace, 1);
+	btrfs_dev_replace_write_unlock(dev_replace);
 	btrfs_scrub_cancel(fs_info);
 
 	trans = btrfs_start_transaction(root, 0);
@@ -743,6 +743,12 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
 	}
 	ret = btrfs_commit_transaction(trans);
 	WARN_ON(ret);
+
+	btrfs_info_in_rcu(fs_info,
+		"dev_replace from %s (devid %llu) to %s canceled",
+		btrfs_dev_name(src_device), src_device->devid,
+		btrfs_dev_name(tgt_device));
+
 	if (tgt_device)
 		btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
 
@@ -756,7 +762,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
 	mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
-	btrfs_dev_replace_lock(dev_replace, 1);
+	btrfs_dev_replace_write_lock(dev_replace);
 	switch (dev_replace->replace_state) {
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -772,7 +778,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
 		break;
 	}
 
-	btrfs_dev_replace_unlock(dev_replace, 1);
+	btrfs_dev_replace_write_unlock(dev_replace);
 	mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 }
 
@@ -782,12 +788,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
 	struct task_struct *task;
 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 
-	btrfs_dev_replace_lock(dev_replace, 1);
+	btrfs_dev_replace_write_lock(dev_replace);
 	switch (dev_replace->replace_state) {
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
-		btrfs_dev_replace_unlock(dev_replace, 1);
+		btrfs_dev_replace_write_unlock(dev_replace);
 		return 0;
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
 		break;
@@ -801,10 +807,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
 			   "cannot continue dev_replace, tgtdev is missing");
 		btrfs_info(fs_info,
 			   "you may cancel the operation after 'mount -o degraded'");
-		btrfs_dev_replace_unlock(dev_replace, 1);
+		btrfs_dev_replace_write_unlock(dev_replace);
 		return 0;
 	}
-	btrfs_dev_replace_unlock(dev_replace, 1);
+	btrfs_dev_replace_write_unlock(dev_replace);
 
 	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
 	task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
@@ -873,37 +879,37 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
 	return 1;
 }
 
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
+void btrfs_dev_replace_read_lock(struct btrfs_dev_replace *dev_replace)
 {
-	if (rw == 1) {
-		/* write */
-again:
-		wait_event(dev_replace->read_lock_wq,
-			   atomic_read(&dev_replace->blocking_readers) == 0);
-		write_lock(&dev_replace->lock);
-		if (atomic_read(&dev_replace->blocking_readers)) {
-			write_unlock(&dev_replace->lock);
-			goto again;
-		}
-	} else {
-		read_lock(&dev_replace->lock);
-		atomic_inc(&dev_replace->read_locks);
-	}
+	read_lock(&dev_replace->lock);
+	atomic_inc(&dev_replace->read_locks);
+}
+
+void btrfs_dev_replace_read_unlock(struct btrfs_dev_replace *dev_replace)
+{
+	ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+	atomic_dec(&dev_replace->read_locks);
+	read_unlock(&dev_replace->lock);
 }
 
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
+void btrfs_dev_replace_write_lock(struct btrfs_dev_replace *dev_replace)
 {
-	if (rw == 1) {
-		/* write */
-		ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
+again:
+	wait_event(dev_replace->read_lock_wq,
+		   atomic_read(&dev_replace->blocking_readers) == 0);
+	write_lock(&dev_replace->lock);
+	if (atomic_read(&dev_replace->blocking_readers)) {
 		write_unlock(&dev_replace->lock);
-	} else {
-		ASSERT(atomic_read(&dev_replace->read_locks) > 0);
-		atomic_dec(&dev_replace->read_locks);
-		read_unlock(&dev_replace->lock);
+		goto again;
 	}
 }
 
+void btrfs_dev_replace_write_unlock(struct btrfs_dev_replace *dev_replace)
+{
+	ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
+	write_unlock(&dev_replace->lock);
+}
+
 /* inc blocking cnt and release read lock */
 void btrfs_dev_replace_set_lock_blocking(
 					struct btrfs_dev_replace *dev_replace)
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index f94a76844ae7..8566a02ef222 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -32,13 +32,14 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 		int read_src);
 void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 			      struct btrfs_ioctl_dev_replace_args *args);
-int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
-			     struct btrfs_ioctl_dev_replace_args *args);
+int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
 void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
 int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
 int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
+void btrfs_dev_replace_read_lock(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_read_unlock(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_write_lock(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_write_unlock(struct btrfs_dev_replace *dev_replace);
 void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
 void btrfs_dev_replace_clear_lock_blocking(
 					struct btrfs_dev_replace *dev_replace);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index cbe421605cd5..29e967b2c667 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -18,7 +18,6 @@
 
 #include "ctree.h"
 #include "disk-io.h"
-#include "hash.h"
 #include "transaction.h"
 
 /*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 21f34ad0d411..07b5e6f7df67 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -31,10 +31,10 @@
 #include <linux/uuid.h>
 #include <linux/semaphore.h>
 #include <linux/error-injection.h>
+#include <linux/crc32c.h>
 #include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
-#include "hash.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "volumes.h"
@@ -110,7 +110,7 @@ int __init btrfs_end_io_wq_init(void)
 	return 0;
 }
 
-void btrfs_end_io_wq_exit(void)
+void __cold btrfs_end_io_wq_exit(void)
 {
 	kmem_cache_destroy(btrfs_end_io_wq_cache);
 }
@@ -124,8 +124,8 @@ struct async_submit_bio {
 	void *private_data;
 	struct btrfs_fs_info *fs_info;
 	struct bio *bio;
-	extent_submit_bio_hook_t *submit_bio_start;
-	extent_submit_bio_hook_t *submit_bio_done;
+	extent_submit_bio_start_t *submit_bio_start;
+	extent_submit_bio_done_t *submit_bio_done;
 	int mirror_num;
 	unsigned long bio_flags;
 	/*
@@ -270,7 +270,7 @@ out:
 
 u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
 {
-	return btrfs_crc32c(seed, data, len);
+	return crc32c(seed, data, len);
 }
 
 void btrfs_csum_final(u32 crc, u8 *result)
@@ -403,8 +403,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 
 	if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
 		u32 crc = ~(u32)0;
-		const int csum_size = sizeof(crc);
-		char result[csum_size];
+		char result[sizeof(crc)];
 
 		/*
 		 * The super_block structure does not span the whole
@@ -415,7 +414,7 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 				crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
 		btrfs_csum_final(crc, result);
 
-		if (memcmp(raw_disk_sb, result, csum_size))
+		if (memcmp(raw_disk_sb, result, sizeof(result)))
 			ret = 1;
 	}
 
@@ -428,13 +427,59 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
+static int verify_level_key(struct btrfs_fs_info *fs_info,
+			    struct extent_buffer *eb, int level,
+			    struct btrfs_key *first_key)
+{
+	int found_level;
+	struct btrfs_key found_key;
+	int ret;
+
+	found_level = btrfs_header_level(eb);
+	if (found_level != level) {
+#ifdef CONFIG_BTRFS_DEBUG
+		WARN_ON(1);
+		btrfs_err(fs_info,
+"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
+			  eb->start, level, found_level);
+#endif
+		return -EIO;
+	}
+
+	if (!first_key)
+		return 0;
+
+	if (found_level)
+		btrfs_node_key_to_cpu(eb, &found_key, 0);
+	else
+		btrfs_item_key_to_cpu(eb, &found_key, 0);
+	ret = btrfs_comp_cpu_keys(first_key, &found_key);
+
+#ifdef CONFIG_BTRFS_DEBUG
+	if (ret) {
+		WARN_ON(1);
+		btrfs_err(fs_info,
+"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
+			  eb->start, first_key->objectid, first_key->type,
+			  first_key->offset, found_key.objectid,
+			  found_key.type, found_key.offset);
+	}
+#endif
+	return ret;
+}
+
 /*
  * helper to read a given tree block, doing retries as required when
  * the checksums don't match and we have alternate mirrors to try.
+ *
+ * @parent_transid:	expected transid, skip check if 0
+ * @level:		expected level, mandatory check
+ * @first_key:		expected key of first slot, skip check if NULL
  */
 static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
 					  struct extent_buffer *eb,
-					  u64 parent_transid)
+					  u64 parent_transid, int level,
+					  struct btrfs_key *first_key)
 {
 	struct extent_io_tree *io_tree;
 	int failed = 0;
@@ -449,11 +494,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
 		ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
 					       mirror_num);
 		if (!ret) {
-			if (!verify_parent_transid(io_tree, eb,
+			if (verify_parent_transid(io_tree, eb,
 						   parent_transid, 0))
-				break;
-			else
 				ret = -EIO;
+			else if (verify_level_key(fs_info, eb, level,
+						  first_key))
+				ret = -EUCLEAN;
+			else
+				break;
 		}
 
 		/*
@@ -461,7 +509,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
 		 * there is no reason to read the other copies, they won't be
 		 * any less wrong.
 		 */
-		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
+		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
+		    ret == -EUCLEAN)
 			break;
 
 		num_copies = btrfs_num_copies(fs_info,
@@ -602,12 +651,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	 * that we don't try and read the other copies of this block, just
 	 * return -EIO.
 	 */
-	if (found_level == 0 && btrfs_check_leaf_full(root, eb)) {
+	if (found_level == 0 && btrfs_check_leaf_full(fs_info, eb)) {
 		set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
 		ret = -EIO;
 	}
 
-	if (found_level > 0 && btrfs_check_node(root, eb))
+	if (found_level > 0 && btrfs_check_node(fs_info, eb))
 		ret = -EIO;
 
 	if (!ret)
@@ -710,14 +759,6 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 	return 0;
 }
 
-unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
-{
-	unsigned long limit = min_t(unsigned long,
-				    info->thread_pool_size,
-				    info->fs_devices->open_devices);
-	return 256 * limit;
-}
-
 static void run_one_async_start(struct btrfs_work *work)
 {
 	struct async_submit_bio *async;
@@ -725,7 +766,6 @@ static void run_one_async_start(struct btrfs_work *work)
 
 	async = container_of(work, struct  async_submit_bio, work);
 	ret = async->submit_bio_start(async->private_data, async->bio,
-				      async->mirror_num, async->bio_flags,
 				      async->bio_offset);
 	if (ret)
 		async->status = ret;
@@ -744,8 +784,7 @@ static void run_one_async_done(struct btrfs_work *work)
 		return;
 	}
 
-	async->submit_bio_done(async->private_data, async->bio, async->mirror_num,
-			       async->bio_flags, async->bio_offset);
+	async->submit_bio_done(async->private_data, async->bio, async->mirror_num);
 }
 
 static void run_one_async_free(struct btrfs_work *work)
@@ -759,8 +798,8 @@ static void run_one_async_free(struct btrfs_work *work)
 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags,
 				 u64 bio_offset, void *private_data,
-				 extent_submit_bio_hook_t *submit_bio_start,
-				 extent_submit_bio_hook_t *submit_bio_done)
+				 extent_submit_bio_start_t *submit_bio_start,
+				 extent_submit_bio_done_t *submit_bio_done)
 {
 	struct async_submit_bio *async;
 
@@ -807,8 +846,7 @@ static blk_status_t btree_csum_one_bio(struct bio *bio)
 	return errno_to_blk_status(ret);
 }
 
-static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio,
-					     int mirror_num, unsigned long bio_flags,
+static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
 					     u64 bio_offset)
 {
 	/*
@@ -818,9 +856,8 @@ static blk_status_t __btree_submit_bio_start(void *private_data, struct bio *bio
 	return btree_csum_one_bio(bio);
 }
 
-static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio,
-					    int mirror_num, unsigned long bio_flags,
-					    u64 bio_offset)
+static blk_status_t btree_submit_bio_done(void *private_data, struct bio *bio,
+					    int mirror_num)
 {
 	struct inode *inode = private_data;
 	blk_status_t ret;
@@ -879,8 +916,8 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio,
 		 */
 		ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, 0,
 					  bio_offset, private_data,
-					  __btree_submit_bio_start,
-					  __btree_submit_bio_done);
+					  btree_submit_bio_start,
+					  btree_submit_bio_done);
 	}
 
 	if (ret)
@@ -1062,8 +1099,17 @@ void btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
 			        buf->start, buf->start + buf->len - 1);
 }
 
+/*
+ * Read tree block at logical address @bytenr and do variant basic but critical
+ * verification.
+ *
+ * @parent_transid:	expected transid of this tree block, skip check if 0
+ * @level:		expected level, mandatory check
+ * @first_key:		expected key in slot 0, skip check if NULL
+ */
 struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
-				      u64 parent_transid)
+				      u64 parent_transid, int level,
+				      struct btrfs_key *first_key)
 {
 	struct extent_buffer *buf = NULL;
 	int ret;
@@ -1072,7 +1118,8 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
 	if (IS_ERR(buf))
 		return buf;
 
-	ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
+	ret = btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
+					     level, first_key);
 	if (ret) {
 		free_extent_buffer(buf);
 		return ERR_PTR(ret);
@@ -1108,7 +1155,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
 	if (!writers)
 		return ERR_PTR(-ENOMEM);
 
-	ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
+	ret = percpu_counter_init(&writers->counter, 0, GFP_NOFS);
 	if (ret < 0) {
 		kfree(writers);
 		return ERR_PTR(ret);
@@ -1160,6 +1207,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	spin_lock_init(&root->accounting_lock);
 	spin_lock_init(&root->log_extents_lock[0]);
 	spin_lock_init(&root->log_extents_lock[1]);
+	spin_lock_init(&root->qgroup_meta_rsv_lock);
 	mutex_init(&root->objectid_mutex);
 	mutex_init(&root->log_mutex);
 	mutex_init(&root->ordered_extent_mutex);
@@ -1176,7 +1224,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	atomic_set(&root->orphan_inodes, 0);
 	refcount_set(&root->refs, 1);
 	atomic_set(&root->will_be_snapshotted, 0);
-	atomic64_set(&root->qgroup_meta_rsv, 0);
 	root->log_transid = 0;
 	root->log_transid_committed = -1;
 	root->last_log_commit = 0;
@@ -1401,6 +1448,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
 	struct btrfs_path *path;
 	u64 generation;
 	int ret;
+	int level;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -1423,9 +1471,10 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
 	}
 
 	generation = btrfs_root_generation(&root->root_item);
+	level = btrfs_root_level(&root->root_item);
 	root->node = read_tree_block(fs_info,
 				     btrfs_root_bytenr(&root->root_item),
-				     generation);
+				     generation, level, NULL);
 	if (IS_ERR(root->node)) {
 		ret = PTR_ERR(root->node);
 		goto find_fail;
@@ -1808,12 +1857,10 @@ sleep:
 		if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
 				      &fs_info->fs_state)))
 			btrfs_cleanup_transaction(fs_info);
-		set_current_state(TASK_INTERRUPTIBLE);
 		if (!kthread_should_stop() &&
 				(!btrfs_transaction_blocked(fs_info) ||
 				 cannot_commit))
-			schedule_timeout(delay);
-		__set_current_state(TASK_RUNNING);
+			schedule_timeout_interruptible(delay);
 	} while (!kthread_should_stop());
 	return 0;
 }
@@ -2183,7 +2230,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
 static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
 		struct btrfs_fs_devices *fs_devices)
 {
-	int max_active = fs_info->thread_pool_size;
+	u32 max_active = fs_info->thread_pool_size;
 	unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
 
 	fs_info->workers =
@@ -2276,6 +2323,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 	struct btrfs_root *log_tree_root;
 	struct btrfs_super_block *disk_super = fs_info->super_copy;
 	u64 bytenr = btrfs_super_log_root(disk_super);
+	int level = btrfs_super_log_root_level(disk_super);
 
 	if (fs_devices->rw_devices == 0) {
 		btrfs_warn(fs_info, "log replay required on RO media");
@@ -2289,7 +2337,8 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 	__setup_root(log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
 
 	log_tree_root->node = read_tree_block(fs_info, bytenr,
-					      fs_info->generation + 1);
+					      fs_info->generation + 1,
+					      level, NULL);
 	if (IS_ERR(log_tree_root->node)) {
 		btrfs_warn(fs_info, "failed to read log tree");
 		ret = PTR_ERR(log_tree_root->node);
@@ -2334,23 +2383,29 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 	location.offset = 0;
 
 	root = btrfs_read_tree_root(tree_root, &location);
-	if (IS_ERR(root))
-		return PTR_ERR(root);
+	if (IS_ERR(root)) {
+		ret = PTR_ERR(root);
+		goto out;
+	}
 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 	fs_info->extent_root = root;
 
 	location.objectid = BTRFS_DEV_TREE_OBJECTID;
 	root = btrfs_read_tree_root(tree_root, &location);
-	if (IS_ERR(root))
-		return PTR_ERR(root);
+	if (IS_ERR(root)) {
+		ret = PTR_ERR(root);
+		goto out;
+	}
 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 	fs_info->dev_root = root;
 	btrfs_init_devices_late(fs_info);
 
 	location.objectid = BTRFS_CSUM_TREE_OBJECTID;
 	root = btrfs_read_tree_root(tree_root, &location);
-	if (IS_ERR(root))
-		return PTR_ERR(root);
+	if (IS_ERR(root)) {
+		ret = PTR_ERR(root);
+		goto out;
+	}
 	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 	fs_info->csum_root = root;
 
@@ -2367,7 +2422,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 	if (IS_ERR(root)) {
 		ret = PTR_ERR(root);
 		if (ret != -ENOENT)
-			return ret;
+			goto out;
 	} else {
 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 		fs_info->uuid_root = root;
@@ -2376,13 +2431,19 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 	if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
 		location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
 		root = btrfs_read_tree_root(tree_root, &location);
-		if (IS_ERR(root))
-			return PTR_ERR(root);
+		if (IS_ERR(root)) {
+			ret = PTR_ERR(root);
+			goto out;
+		}
 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 		fs_info->free_space_root = root;
 	}
 
 	return 0;
+out:
+	btrfs_warn(fs_info, "failed to read root (objectid=%llu): %d",
+		   location.objectid, ret);
+	return ret;
 }
 
 int open_ctree(struct super_block *sb,
@@ -2404,8 +2465,8 @@ int open_ctree(struct super_block *sb,
 	int err = -EINVAL;
 	int num_backups_tried = 0;
 	int backup_index = 0;
-	int max_active;
 	int clear_free_space_tree = 0;
+	int level;
 
 	tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
 	chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -2447,6 +2508,8 @@ int open_ctree(struct super_block *sb,
 	INIT_LIST_HEAD(&fs_info->delayed_iputs);
 	INIT_LIST_HEAD(&fs_info->delalloc_roots);
 	INIT_LIST_HEAD(&fs_info->caching_block_groups);
+	INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
+	spin_lock_init(&fs_info->pending_raid_kobjs_lock);
 	spin_lock_init(&fs_info->delalloc_root_lock);
 	spin_lock_init(&fs_info->trans_lock);
 	spin_lock_init(&fs_info->fs_roots_radix_lock);
@@ -2713,8 +2776,6 @@ int open_ctree(struct super_block *sb,
 		goto fail_alloc;
 	}
 
-	max_active = fs_info->thread_pool_size;
-
 	ret = btrfs_init_workqueues(fs_info, fs_devices);
 	if (ret) {
 		err = ret;
@@ -2741,12 +2802,13 @@ int open_ctree(struct super_block *sb,
 	}
 
 	generation = btrfs_super_chunk_root_generation(disk_super);
+	level = btrfs_super_chunk_root_level(disk_super);
 
 	__setup_root(chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
 
 	chunk_root->node = read_tree_block(fs_info,
 					   btrfs_super_chunk_root(disk_super),
-					   generation);
+					   generation, level, NULL);
 	if (IS_ERR(chunk_root->node) ||
 	    !extent_buffer_uptodate(chunk_root->node)) {
 		btrfs_err(fs_info, "failed to read chunk root");
@@ -2768,10 +2830,10 @@ int open_ctree(struct super_block *sb,
 	}
 
 	/*
-	 * keep the device that is marked to be the target device for the
-	 * dev_replace procedure
+	 * Keep the devid that is marked to be the target device for the
+	 * device replace procedure
 	 */
-	btrfs_close_extra_devices(fs_devices, 0);
+	btrfs_free_extra_devids(fs_devices, 0);
 
 	if (!fs_devices->latest_bdev) {
 		btrfs_err(fs_info, "failed to read devices");
@@ -2780,10 +2842,11 @@ int open_ctree(struct super_block *sb,
 
 retry_root_backup:
 	generation = btrfs_super_generation(disk_super);
+	level = btrfs_super_root_level(disk_super);
 
 	tree_root->node = read_tree_block(fs_info,
 					  btrfs_super_root(disk_super),
-					  generation);
+					  generation, level, NULL);
 	if (IS_ERR(tree_root->node) ||
 	    !extent_buffer_uptodate(tree_root->node)) {
 		btrfs_warn(fs_info, "failed to read tree root");
@@ -2834,7 +2897,7 @@ retry_root_backup:
 		goto fail_block_groups;
 	}
 
-	btrfs_close_extra_devices(fs_devices, 1);
+	btrfs_free_extra_devids(fs_devices, 1);
 
 	ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
 	if (ret) {
@@ -2953,6 +3016,7 @@ retry_root_backup:
 	fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
 	if (IS_ERR(fs_info->fs_root)) {
 		err = PTR_ERR(fs_info->fs_root);
+		btrfs_warn(fs_info, "failed to read fs tree: %d", err);
 		goto fail_qgroup;
 	}
 
@@ -3290,6 +3354,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 	struct buffer_head *bh;
 	int i;
 	int errors = 0;
+	bool primary_failed = false;
 	u64 bytenr;
 
 	if (max_mirrors == 0)
@@ -3306,11 +3371,16 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 				      BTRFS_SUPER_INFO_SIZE);
 		if (!bh) {
 			errors++;
+			if (i == 0)
+				primary_failed = true;
 			continue;
 		}
 		wait_on_buffer(bh);
-		if (!buffer_uptodate(bh))
+		if (!buffer_uptodate(bh)) {
 			errors++;
+			if (i == 0)
+				primary_failed = true;
+		}
 
 		/* drop our reference */
 		brelse(bh);
@@ -3319,6 +3389,13 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
 		brelse(bh);
 	}
 
+	/* log error, force error return */
+	if (primary_failed) {
+		btrfs_err(device->fs_info, "error writing primary super block to device %llu",
+			  device->devid);
+		return -1;
+	}
+
 	return errors < i ? 0 : -1;
 }
 
@@ -3851,7 +3928,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 	 * So here we should only check item pointers, not item data.
 	 */
 	if (btrfs_header_level(buf) == 0 &&
-	    btrfs_check_leaf_relaxed(root, buf)) {
+	    btrfs_check_leaf_relaxed(fs_info, buf)) {
 		btrfs_print_leaf(buf);
 		ASSERT(0);
 	}
@@ -3890,12 +3967,14 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
 	__btrfs_btree_balance_dirty(fs_info, 0);
 }
 
-int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
+int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
+		      struct btrfs_key *first_key)
 {
 	struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
-	return btree_read_extent_buffer_pages(fs_info, buf, parent_transid);
+	return btree_read_extent_buffer_pages(fs_info, buf, parent_transid,
+					      level, first_key);
 }
 
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
@@ -4314,11 +4393,6 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
 		cache = list_first_entry(&cur_trans->dirty_bgs,
 					 struct btrfs_block_group_cache,
 					 dirty_list);
-		if (!cache) {
-			btrfs_err(fs_info, "orphan block group dirty_bgs list");
-			spin_unlock(&cur_trans->dirty_bgs_lock);
-			return;
-		}
 
 		if (!list_empty(&cache->io_list)) {
 			spin_unlock(&cur_trans->dirty_bgs_lock);
@@ -4338,14 +4412,14 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
 	}
 	spin_unlock(&cur_trans->dirty_bgs_lock);
 
+	/*
+	 * Refer to the definition of io_bgs member for details why it's safe
+	 * to use it without any locking
+	 */
 	while (!list_empty(&cur_trans->io_bgs)) {
 		cache = list_first_entry(&cur_trans->io_bgs,
 					 struct btrfs_block_group_cache,
 					 io_list);
-		if (!cache) {
-			btrfs_err(fs_info, "orphan block group on io_bgs list");
-			return;
-		}
 
 		list_del_init(&cache->io_list);
 		spin_lock(&cache->lock);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 301151a50ac1..453ea9f5d4e9 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,8 +52,9 @@ static inline u64 btrfs_sb_offset(int mirror)
 struct btrfs_device;
 struct btrfs_fs_devices;
 
-struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info,
-				      u64 bytenr, u64 parent_transid);
+struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
+				      u64 parent_transid, int level,
+				      struct btrfs_key *first_key);
 void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
 int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
 			 int mirror_num, struct extent_buffer **eb);
@@ -123,7 +124,8 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 			  int atomic);
-int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
+int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
+		      struct btrfs_key *first_key);
 u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, u8 *result);
 blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
@@ -131,9 +133,8 @@ blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 			int mirror_num, unsigned long bio_flags,
 			u64 bio_offset, void *private_data,
-			extent_submit_bio_hook_t *submit_bio_start,
-			extent_submit_bio_hook_t *submit_bio_done);
-unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
+			extent_submit_bio_start_t *submit_bio_start,
+			extent_submit_bio_done_t *submit_bio_done);
 int btrfs_write_tree_block(struct extent_buffer *buf);
 void btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
 int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
@@ -154,7 +155,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 		int create);
 int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
 int __init btrfs_end_io_wq_init(void);
-void btrfs_end_io_wq_exit(void);
+void __cold btrfs_end_io_wq_exit(void);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e0460d7b5622..e08d0d45af4f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -27,7 +27,7 @@
 #include <linux/ratelimit.h>
 #include <linux/percpu_counter.h>
 #include <linux/lockdep.h>
-#include "hash.h"
+#include <linux/crc32c.h>
 #include "tree-log.h"
 #include "disk-io.h"
 #include "print-tree.h"
@@ -535,13 +535,11 @@ static noinline void caching_thread(struct btrfs_work *work)
 	struct btrfs_block_group_cache *block_group;
 	struct btrfs_fs_info *fs_info;
 	struct btrfs_caching_control *caching_ctl;
-	struct btrfs_root *extent_root;
 	int ret;
 
 	caching_ctl = container_of(work, struct btrfs_caching_control, work);
 	block_group = caching_ctl->block_group;
 	fs_info = block_group->fs_info;
-	extent_root = fs_info->extent_root;
 
 	mutex_lock(&caching_ctl->mutex);
 	down_read(&fs_info->commit_root_sem);
@@ -1203,11 +1201,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
 	__le64 lenum;
 
 	lenum = cpu_to_le64(root_objectid);
-	high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
+	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
 	lenum = cpu_to_le64(owner);
-	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
 	lenum = cpu_to_le64(offset);
-	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
+	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
 
 	return ((u64)high_crc << 31) ^ (u64)low_crc;
 }
@@ -2652,9 +2650,9 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
  */
 static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
-					     struct btrfs_fs_info *fs_info,
 					     unsigned long nr)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_delayed_ref_node *ref;
 	struct btrfs_delayed_ref_head *locked_ref = NULL;
@@ -2994,7 +2992,7 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 	if (trans->transid > async->transid)
 		goto end;
 
-	ret = btrfs_run_delayed_refs(trans, fs_info, async->count);
+	ret = btrfs_run_delayed_refs(trans, async->count);
 	if (ret)
 		async->error = ret;
 end:
@@ -3053,8 +3051,9 @@ int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
  * Returns <0 on error and aborts the transaction
  */
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
-			   struct btrfs_fs_info *fs_info, unsigned long count)
+			   unsigned long count)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct rb_node *node;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_delayed_ref_head *head;
@@ -3078,7 +3077,7 @@ again:
 	delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
 #endif
 	trans->can_flush_pending_bgs = false;
-	ret = __btrfs_run_delayed_refs(trans, fs_info, count);
+	ret = __btrfs_run_delayed_refs(trans, count);
 	if (ret < 0) {
 		btrfs_abort_transaction(trans, ret);
 		return ret;
@@ -3086,7 +3085,7 @@ again:
 
 	if (run_all) {
 		if (!list_empty(&trans->new_bgs))
-			btrfs_create_pending_block_groups(trans, fs_info);
+			btrfs_create_pending_block_groups(trans);
 
 		spin_lock(&delayed_refs->lock);
 		node = rb_first(&delayed_refs->href_root);
@@ -3660,9 +3659,9 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
  * the commit latency by getting rid of the easy block groups while
  * we're still allowing others to join the commit.
  */
-int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info)
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_block_group_cache *cache;
 	struct btrfs_transaction *cur_trans = trans->transaction;
 	int ret = 0;
@@ -3686,7 +3685,7 @@ again:
 	 * make sure all the block groups on our dirty list actually
 	 * exist
 	 */
-	btrfs_create_pending_block_groups(trans, fs_info);
+	btrfs_create_pending_block_groups(trans);
 
 	if (!path) {
 		path = btrfs_alloc_path();
@@ -3741,8 +3740,9 @@ again:
 				should_put = 0;
 
 				/*
-				 * the cache_write_mutex is protecting
-				 * the io_list
+				 * The cache_write_mutex is protecting the
+				 * io_list, also refer to the definition of
+				 * btrfs_transaction::io_bgs for more details
 				 */
 				list_add_tail(&cache->io_list, io);
 			} else {
@@ -3800,7 +3800,7 @@ again:
 	 * go through delayed refs for all the stuff we've just kicked off
 	 * and then loop back (just once)
 	 */
-	ret = btrfs_run_delayed_refs(trans, fs_info, 0);
+	ret = btrfs_run_delayed_refs(trans, 0);
 	if (!ret && loops == 0) {
 		loops++;
 		spin_lock(&cur_trans->dirty_bgs_lock);
@@ -3882,7 +3882,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 		cache_save_setup(cache, trans, path);
 
 		if (!ret)
-			ret = btrfs_run_delayed_refs(trans, fs_info,
+			ret = btrfs_run_delayed_refs(trans,
 						     (unsigned long) -1);
 
 		if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
@@ -3934,6 +3934,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 	}
 	spin_unlock(&cur_trans->dirty_bgs_lock);
 
+	/*
+	 * Refer to the definition of io_bgs member for details why it's safe
+	 * to use it without any locking
+	 */
 	while (!list_empty(io)) {
 		cache = list_first_entry(io, struct btrfs_block_group_cache,
 					 io_list);
@@ -4332,8 +4336,7 @@ again:
 
 		/* commit the current transaction and try again */
 commit_trans:
-		if (need_commit &&
-		    !atomic_read(&fs_info->open_ioctl_trans)) {
+		if (need_commit) {
 			need_commit--;
 
 			if (need_commit > 0) {
@@ -4541,7 +4544,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
 	 * Needed because we can end up allocating a system chunk and for an
 	 * atomic and race free space reservation in the chunk block reserve.
 	 */
-	ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
+	lockdep_assert_held(&fs_info->chunk_mutex);
 
 	info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
 	spin_lock(&info->lock);
@@ -4602,11 +4605,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 		return -ENOSPC;
 
 	space_info = __find_space_info(fs_info, flags);
-	if (!space_info) {
-		ret = create_space_info(fs_info, flags, &space_info);
-		if (ret)
-			return ret;
-	}
+	ASSERT(space_info);
 
 again:
 	spin_lock(&space_info->lock);
@@ -4705,7 +4704,7 @@ out:
 	 */
 	if (trans->can_flush_pending_bgs &&
 	    trans->chunk_bytes_reserved >= (u64)SZ_2M) {
-		btrfs_create_pending_block_groups(trans, fs_info);
+		btrfs_create_pending_block_groups(trans);
 		btrfs_trans_release_chunk_metadata(trans);
 	}
 	return ret;
@@ -4826,7 +4825,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
 	long time_left;
 	unsigned long nr_pages;
 	int loops;
-	enum btrfs_reserve_flush_enum flush;
 
 	/* Calc the number of the pages we need flush for space reservation */
 	items = calc_reclaim_items_nr(fs_info, to_reclaim);
@@ -4867,10 +4865,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
 			   atomic_read(&fs_info->async_delalloc_pages) <=
 			   (int)max_reclaim);
 skip_async:
-		if (!trans)
-			flush = BTRFS_RESERVE_FLUSH_ALL;
-		else
-			flush = BTRFS_RESERVE_NO_FLUSH;
 		spin_lock(&space_info->lock);
 		if (list_empty(&space_info->tickets) &&
 		    list_empty(&space_info->priority_tickets)) {
@@ -4993,7 +4987,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 			ret = PTR_ERR(trans);
 			break;
 		}
-		ret = btrfs_run_delayed_items_nr(trans, fs_info, nr);
+		ret = btrfs_run_delayed_items_nr(trans, nr);
 		btrfs_end_transaction(trans);
 		break;
 	case FLUSH_DELALLOC:
@@ -5388,10 +5382,15 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
 		    !block_rsv_use_bytes(global_rsv, orig_bytes))
 			ret = 0;
 	}
-	if (ret == -ENOSPC)
+	if (ret == -ENOSPC) {
 		trace_btrfs_space_reservation(fs_info, "space_info:enospc",
 					      block_rsv->space_info->flags,
 					      orig_bytes, 1);
+
+		if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
+			dump_space_info(fs_info, block_rsv->space_info,
+					orig_bytes, 0);
+	}
 	return ret;
 }
 
@@ -5760,6 +5759,9 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
 	if (num_bytes == 0)
 		return 0;
 
+	ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+	if (ret)
+		return ret;
 	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
 	if (!ret) {
 		block_rsv_add_bytes(block_rsv, num_bytes, 0);
@@ -5772,11 +5774,15 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
 /**
  * btrfs_inode_rsv_release - release any excessive reservation.
  * @inode - the inode we need to release from.
+ * @qgroup_free - free or convert qgroup meta.
+ *   Unlike normal operation, qgroup meta reservation needs to know if we are
+ *   freeing qgroup reservation or just converting it into per-trans.  Normally
+ *   @qgroup_free is true for error handling, and false for normal release.
  *
  * This is the same as btrfs_block_rsv_release, except that it handles the
  * tracepoint for the reservation.
  */
-static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
+static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
@@ -5792,6 +5798,10 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode)
 	if (released > 0)
 		trace_btrfs_space_reservation(fs_info, "delalloc",
 					      btrfs_ino(inode), released, 0);
+	if (qgroup_free)
+		btrfs_qgroup_free_meta_prealloc(inode->root, released);
+	else
+		btrfs_qgroup_convert_reserved_meta(inode->root, released);
 }
 
 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
@@ -5892,24 +5902,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 	WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
 }
 
-void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info)
-{
-	if (!trans->block_rsv) {
-		ASSERT(!trans->bytes_reserved);
-		return;
-	}
-
-	if (!trans->bytes_reserved)
-		return;
-
-	ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
-	trace_btrfs_space_reservation(fs_info, "transaction",
-				      trans->transid, trans->bytes_reserved, 0);
-	btrfs_block_rsv_release(fs_info, trans->block_rsv,
-				trans->bytes_reserved);
-	trans->bytes_reserved = 0;
-}
 
 /*
  * To be called after all the new block groups attached to the transaction
@@ -5951,7 +5943,7 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 	 */
 	u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
-	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode), 
+	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
 			num_bytes, 1);
 	return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
 }
@@ -5995,7 +5987,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
 		/* One for parent inode, two for dir entries */
 		num_bytes = 3 * fs_info->nodesize;
-		ret = btrfs_qgroup_reserve_meta(root, num_bytes, true);
+		ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
 		if (ret)
 			return ret;
 	} else {
@@ -6014,7 +6006,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 		ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
 
 	if (ret && *qgroup_reserved)
-		btrfs_qgroup_free_meta(root, *qgroup_reserved);
+		btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
 
 	return ret;
 }
@@ -6051,7 +6043,6 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
-	struct btrfs_root *root = inode->root;
 	unsigned nr_extents;
 	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
 	int ret = 0;
@@ -6068,13 +6059,13 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	if (btrfs_is_free_space_inode(inode)) {
 		flush = BTRFS_RESERVE_NO_FLUSH;
 		delalloc_lock = false;
-	} else if (current->journal_info) {
-		flush = BTRFS_RESERVE_FLUSH_LIMIT;
-	}
+	} else {
+		if (current->journal_info)
+			flush = BTRFS_RESERVE_FLUSH_LIMIT;
 
-	if (flush != BTRFS_RESERVE_NO_FLUSH &&
-	    btrfs_transaction_in_commit(fs_info))
-		schedule_timeout(1);
+		if (btrfs_transaction_in_commit(fs_info))
+			schedule_timeout(1);
+	}
 
 	if (delalloc_lock)
 		mutex_lock(&inode->delalloc_mutex);
@@ -6089,19 +6080,9 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
 	spin_unlock(&inode->lock);
 
-	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
-		ret = btrfs_qgroup_reserve_meta(root,
-				nr_extents * fs_info->nodesize, true);
-		if (ret)
-			goto out_fail;
-	}
-
 	ret = btrfs_inode_rsv_refill(inode, flush);
-	if (unlikely(ret)) {
-		btrfs_qgroup_free_meta(root,
-				       nr_extents * fs_info->nodesize);
+	if (unlikely(ret))
 		goto out_fail;
-	}
 
 	if (delalloc_lock)
 		mutex_unlock(&inode->delalloc_mutex);
@@ -6115,7 +6096,7 @@ out_fail:
 	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
 	spin_unlock(&inode->lock);
 
-	btrfs_inode_rsv_release(inode);
+	btrfs_inode_rsv_release(inode, true);
 	if (delalloc_lock)
 		mutex_unlock(&inode->delalloc_mutex);
 	return ret;
@@ -6125,12 +6106,14 @@ out_fail:
  * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
  * @inode: the inode to release the reservation for.
  * @num_bytes: the number of bytes we are releasing.
+ * @qgroup_free: free qgroup reservation or convert it to per-trans reservation
  *
  * This will release the metadata reservation for an inode.  This can be called
  * once we complete IO for a given set of bytes to release their metadata
  * reservations, or on error for the same reason.
  */
-void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
+				     bool qgroup_free)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 
@@ -6143,13 +6126,14 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	if (btrfs_is_testing(fs_info))
 		return;
 
-	btrfs_inode_rsv_release(inode);
+	btrfs_inode_rsv_release(inode, qgroup_free);
 }
 
 /**
  * btrfs_delalloc_release_extents - release our outstanding_extents
  * @inode: the inode to balance the reservation for.
  * @num_bytes: the number of bytes we originally reserved with
+ * @qgroup_free: do we need to free qgroup meta reservation or convert them.
  *
  * When we reserve space we increase outstanding_extents for the extents we may
  * add.  Once we've set the range as delalloc or created our ordered extents we
@@ -6157,7 +6141,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
  * temporarily tracked outstanding_extents.  This _must_ be used in conjunction
  * with btrfs_delalloc_reserve_metadata.
  */
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
+				    bool qgroup_free)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	unsigned num_extents;
@@ -6171,7 +6156,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
 	if (btrfs_is_testing(fs_info))
 		return;
 
-	btrfs_inode_rsv_release(inode);
+	btrfs_inode_rsv_release(inode, qgroup_free);
 }
 
 /**
@@ -6227,9 +6212,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode,
  */
 void btrfs_delalloc_release_space(struct inode *inode,
 				  struct extent_changeset *reserved,
-				  u64 start, u64 len)
+				  u64 start, u64 len, bool qgroup_free)
 {
-	btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
+	btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
 	btrfs_free_reserved_data_space(inode, reserved, start, len);
 }
 
@@ -6783,9 +6768,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
-			       struct btrfs_fs_info *fs_info)
+int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_block_group_cache *block_group, *tmp;
 	struct list_head *deleted_bgs;
 	struct extent_io_tree *unpin;
@@ -7351,29 +7336,6 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
 	return ret;
 }
 
-int __get_raid_index(u64 flags)
-{
-	if (flags & BTRFS_BLOCK_GROUP_RAID10)
-		return BTRFS_RAID_RAID10;
-	else if (flags & BTRFS_BLOCK_GROUP_RAID1)
-		return BTRFS_RAID_RAID1;
-	else if (flags & BTRFS_BLOCK_GROUP_DUP)
-		return BTRFS_RAID_DUP;
-	else if (flags & BTRFS_BLOCK_GROUP_RAID0)
-		return BTRFS_RAID_RAID0;
-	else if (flags & BTRFS_BLOCK_GROUP_RAID5)
-		return BTRFS_RAID_RAID5;
-	else if (flags & BTRFS_BLOCK_GROUP_RAID6)
-		return BTRFS_RAID_RAID6;
-
-	return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
-}
-
-int get_block_group_index(struct btrfs_block_group_cache *cache)
-{
-	return __get_raid_index(cache->flags);
-}
-
 static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
 	[BTRFS_RAID_RAID10]	= "raid10",
 	[BTRFS_RAID_RAID1]	= "raid1",
@@ -7488,7 +7450,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 	u64 empty_cluster = 0;
 	struct btrfs_space_info *space_info;
 	int loop = 0;
-	int index = __get_raid_index(flags);
+	int index = btrfs_bg_flags_to_raid_index(flags);
 	bool failed_cluster_refill = false;
 	bool failed_alloc = false;
 	bool use_cluster = true;
@@ -7574,7 +7536,8 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 				btrfs_put_block_group(block_group);
 				up_read(&space_info->groups_sem);
 			} else {
-				index = get_block_group_index(block_group);
+				index = btrfs_bg_flags_to_raid_index(
+						block_group->flags);
 				btrfs_lock_block_group(block_group, delalloc);
 				goto have_block_group;
 			}
@@ -7584,7 +7547,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 	}
 search:
 	have_caching_bg = false;
-	if (index == 0 || index == __get_raid_index(flags))
+	if (index == 0 || index == btrfs_bg_flags_to_raid_index(flags))
 		full_search = true;
 	down_read(&space_info->groups_sem);
 	list_for_each_entry(block_group, &space_info->block_groups[index],
@@ -7842,7 +7805,8 @@ checks:
 loop:
 		failed_cluster_refill = false;
 		failed_alloc = false;
-		BUG_ON(index != get_block_group_index(block_group));
+		BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
+		       index);
 		btrfs_release_block_group(block_group, delalloc);
 		cond_resched();
 	}
@@ -7996,6 +7960,51 @@ again:
 	up_read(&info->groups_sem);
 }
 
+/*
+ * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
+ *			  hole that is at least as big as @num_bytes.
+ *
+ * @root           -	The root that will contain this extent
+ *
+ * @ram_bytes      -	The amount of space in ram that @num_bytes take. This
+ *			is used for accounting purposes. This value differs
+ *			from @num_bytes only in the case of compressed extents.
+ *
+ * @num_bytes      -	Number of bytes to allocate on-disk.
+ *
+ * @min_alloc_size -	Indicates the minimum amount of space that the
+ *			allocator should try to satisfy. In some cases
+ *			@num_bytes may be larger than what is required and if
+ *			the filesystem is fragmented then allocation fails.
+ *			However, the presence of @min_alloc_size gives a
+ *			chance to try and satisfy the smaller allocation.
+ *
+ * @empty_size     -	A hint that you plan on doing more COW. This is the
+ *			size in bytes the allocator should try to find free
+ *			next to the block it returns.  This is just a hint and
+ *			may be ignored by the allocator.
+ *
+ * @hint_byte      -	Hint to the allocator to start searching above the byte
+ *			address passed. It might be ignored.
+ *
+ * @ins            -	This key is modified to record the found hole. It will
+ *			have the following values:
+ *			ins->objectid == start position
+ *			ins->flags = BTRFS_EXTENT_ITEM_KEY
+ *			ins->offset == the size of the hole.
+ *
+ * @is_data        -	Boolean flag indicating whether an extent is
+ *			allocated for data (true) or metadata (false)
+ *
+ * @delalloc       -	Boolean flag indicating whether this allocation is for
+ *			delalloc or not. If 'true' data_rwsem of block groups
+ *			is going to be acquired.
+ *
+ *
+ * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
+ * case -ENOSPC is returned then @ins->offset will contain the size of the
+ * largest available hole the allocator managed to find.
+ */
 int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
 			 u64 num_bytes, u64 min_alloc_size,
 			 u64 empty_size, u64 hint_byte,
@@ -8699,6 +8708,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 	u64 parent;
 	u32 blocksize;
 	struct btrfs_key key;
+	struct btrfs_key first_key;
 	struct extent_buffer *next;
 	int level = wc->level;
 	int reada = 0;
@@ -8719,6 +8729,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 	}
 
 	bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
+	btrfs_node_key_to_cpu(path->nodes[level], &first_key,
+			      path->slots[level]);
 	blocksize = fs_info->nodesize;
 
 	next = find_extent_buffer(fs_info, bytenr);
@@ -8783,7 +8795,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 	if (!next) {
 		if (reada && level == 1)
 			reada_walk_down(trans, root, wc, path);
-		next = read_tree_block(fs_info, bytenr, generation);
+		next = read_tree_block(fs_info, bytenr, generation, level - 1,
+				       &first_key);
 		if (IS_ERR(next)) {
 			return PTR_ERR(next);
 		} else if (!extent_buffer_uptodate(next)) {
@@ -9648,7 +9661,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
 	 */
 	target = get_restripe_target(fs_info, block_group->flags);
 	if (target) {
-		index = __get_raid_index(extended_to_chunk(target));
+		index = btrfs_bg_flags_to_raid_index(extended_to_chunk(target));
 	} else {
 		/*
 		 * this is just a balance, so if we were marked as full
@@ -9662,7 +9675,7 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
 			goto out;
 		}
 
-		index = get_block_group_index(block_group);
+		index = btrfs_bg_flags_to_raid_index(block_group->flags);
 	}
 
 	if (index == BTRFS_RAID_RAID10) {
@@ -9911,10 +9924,40 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 	return 0;
 }
 
+/* link_block_group will queue up kobjects to add when we're reclaim-safe */
+void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_space_info *space_info;
+	struct raid_kobject *rkobj;
+	LIST_HEAD(list);
+	int index;
+	int ret = 0;
+
+	spin_lock(&fs_info->pending_raid_kobjs_lock);
+	list_splice_init(&fs_info->pending_raid_kobjs, &list);
+	spin_unlock(&fs_info->pending_raid_kobjs_lock);
+
+	list_for_each_entry(rkobj, &list, list) {
+		space_info = __find_space_info(fs_info, rkobj->flags);
+		index = btrfs_bg_flags_to_raid_index(rkobj->flags);
+
+		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
+				  "%s", get_raid_name(index));
+		if (ret) {
+			kobject_put(&rkobj->kobj);
+			break;
+		}
+	}
+	if (ret)
+		btrfs_warn(fs_info,
+			   "failed to add kobject for block cache, ignoring");
+}
+
 static void link_block_group(struct btrfs_block_group_cache *cache)
 {
 	struct btrfs_space_info *space_info = cache->space_info;
-	int index = get_block_group_index(cache);
+	struct btrfs_fs_info *fs_info = cache->fs_info;
+	int index = btrfs_bg_flags_to_raid_index(cache->flags);
 	bool first = false;
 
 	down_write(&space_info->groups_sem);
@@ -9924,27 +9967,20 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
 	up_write(&space_info->groups_sem);
 
 	if (first) {
-		struct raid_kobject *rkobj;
-		int ret;
-
-		rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
-		if (!rkobj)
-			goto out_err;
-		rkobj->raid_type = index;
-		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
-		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
-				  "%s", get_raid_name(index));
-		if (ret) {
-			kobject_put(&rkobj->kobj);
-			goto out_err;
+		struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
+		if (!rkobj) {
+			btrfs_warn(cache->fs_info,
+				"couldn't alloc memory for raid level kobject");
+			return;
 		}
+		rkobj->flags = cache->flags;
+		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
+
+		spin_lock(&fs_info->pending_raid_kobjs_lock);
+		list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
+		spin_unlock(&fs_info->pending_raid_kobjs_lock);
 		space_info->block_group_kobjs[index] = &rkobj->kobj;
 	}
-
-	return;
-out_err:
-	btrfs_warn(cache->fs_info,
-		   "failed to add kobject for block cache, ignoring");
 }
 
 static struct btrfs_block_group_cache *
@@ -10160,6 +10196,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 			inc_block_group_ro(cache, 1);
 	}
 
+	btrfs_add_raid_kobjects(info);
 	init_global_block_rsv(info);
 	ret = 0;
 error:
@@ -10167,9 +10204,9 @@ error:
 	return ret;
 }
 
-void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
-				       struct btrfs_fs_info *fs_info)
+void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_block_group_cache *block_group, *tmp;
 	struct btrfs_root *extent_root = fs_info->extent_root;
 	struct btrfs_block_group_item item;
@@ -10254,15 +10291,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 	 * with its ->space_info set.
 	 */
 	cache->space_info = __find_space_info(fs_info, cache->flags);
-	if (!cache->space_info) {
-		ret = create_space_info(fs_info, cache->flags,
-				       &cache->space_info);
-		if (ret) {
-			btrfs_remove_free_space_cache(cache);
-			btrfs_put_block_group(cache);
-			return ret;
-		}
-	}
+	ASSERT(cache->space_info);
 
 	ret = btrfs_add_block_group_cache(fs_info, cache);
 	if (ret) {
@@ -10334,7 +10363,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 				  block_group->key.offset);
 
 	memcpy(&key, &block_group->key, sizeof(key));
-	index = get_block_group_index(block_group);
+	index = btrfs_bg_flags_to_raid_index(block_group->flags);
 	if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
 				  BTRFS_BLOCK_GROUP_RAID1 |
 				  BTRFS_BLOCK_GROUP_RAID10))
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index dfeb74a0be77..47a8fe9d22e8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -76,8 +76,8 @@ void btrfs_leak_debug_check(void)
 
 	while (!list_empty(&buffers)) {
 		eb = list_entry(buffers.next, struct extent_buffer, leak_list);
-		pr_err("BTRFS: buffer leak start %llu len %lu refs %d\n",
-		       eb->start, eb->len, atomic_read(&eb->refs));
+		pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
+		       eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
 		list_del(&eb->leak_list);
 		kmem_cache_free(extent_buffer_cache, eb);
 	}
@@ -119,23 +119,22 @@ struct extent_page_data {
 	unsigned int sync_io:1;
 };
 
-static void add_extent_changeset(struct extent_state *state, unsigned bits,
+static int add_extent_changeset(struct extent_state *state, unsigned bits,
 				 struct extent_changeset *changeset,
 				 int set)
 {
 	int ret;
 
 	if (!changeset)
-		return;
+		return 0;
 	if (set && (state->state & bits) == bits)
-		return;
+		return 0;
 	if (!set && (state->state & bits) == 0)
-		return;
+		return 0;
 	changeset->bytes_changed += state->end - state->start + 1;
 	ret = ulist_add(&changeset->range_changed, state->start, state->end,
 			GFP_ATOMIC);
-	/* ENOMEM */
-	BUG_ON(ret < 0);
+	return ret;
 }
 
 static void flush_write_bio(struct extent_page_data *epd);
@@ -187,7 +186,7 @@ free_state_cache:
 	return -ENOMEM;
 }
 
-void extent_io_exit(void)
+void __cold extent_io_exit(void)
 {
 	btrfs_leak_debug_check();
 
@@ -527,6 +526,7 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
 {
 	struct extent_state *next;
 	unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
+	int ret;
 
 	if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
 		u64 range = state->end - state->start + 1;
@@ -534,7 +534,8 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
 		tree->dirty_bytes -= range;
 	}
 	clear_state_cb(tree, state, bits);
-	add_extent_changeset(state, bits_to_clear, changeset, 0);
+	ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
+	BUG_ON(ret < 0);
 	state->state &= ~bits_to_clear;
 	if (wake)
 		wake_up(&state->wq);
@@ -805,13 +806,15 @@ static void set_state_bits(struct extent_io_tree *tree,
 			   unsigned *bits, struct extent_changeset *changeset)
 {
 	unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
+	int ret;
 
 	set_state_cb(tree, state, bits);
 	if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
 		u64 range = state->end - state->start + 1;
 		tree->dirty_bytes += range;
 	}
-	add_extent_changeset(state, bits_to_set, changeset, 1);
+	ret = add_extent_changeset(state, bits_to_set, changeset, 1);
+	BUG_ON(ret < 0);
 	state->state |= bits_to_set;
 }
 
@@ -2744,20 +2747,21 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 	return blk_status_to_errno(ret);
 }
 
-static int merge_bio(struct extent_io_tree *tree, struct page *page,
-		     unsigned long offset, size_t size, struct bio *bio,
-		     unsigned long bio_flags)
-{
-	int ret = 0;
-	if (tree->ops)
-		ret = tree->ops->merge_bio_hook(page, offset, size, bio,
-						bio_flags);
-	return ret;
-
-}
-
 /*
  * @opf:	bio REQ_OP_* and REQ_* flags as one value
+ * @tree:	tree so we can call our merge_bio hook
+ * @wbc:	optional writeback control for io accounting
+ * @page:	page to add to the bio
+ * @pg_offset:	offset of the new bio or to check whether we are adding
+ *              a contiguous page to the previous one
+ * @size:	portion of page that we want to write
+ * @offset:	starting offset in the page
+ * @bdev:	attach newly created bios to this bdev
+ * @bio_ret:	must be valid pointer, newly allocated bio will be stored there
+ * @end_io_func:     end_io callback for new bio
+ * @mirror_num:	     desired mirror to read/write
+ * @prev_bio_flags:  flags of previous bio to see if we can merge the current one
+ * @bio_flags:	flags of the current bio to see if we can merge them
  */
 static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 			      struct writeback_control *wbc,
@@ -2773,21 +2777,27 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 {
 	int ret = 0;
 	struct bio *bio;
-	int contig = 0;
-	int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
 	size_t page_size = min_t(size_t, size, PAGE_SIZE);
 	sector_t sector = offset >> 9;
 
-	if (bio_ret && *bio_ret) {
+	ASSERT(bio_ret);
+
+	if (*bio_ret) {
+		bool contig;
+		bool can_merge = true;
+
 		bio = *bio_ret;
-		if (old_compressed)
+		if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
 			contig = bio->bi_iter.bi_sector == sector;
 		else
 			contig = bio_end_sector(bio) == sector;
 
-		if (prev_bio_flags != bio_flags || !contig ||
+		if (tree->ops && tree->ops->merge_bio_hook(page, offset,
+					page_size, bio, bio_flags))
+			can_merge = false;
+
+		if (prev_bio_flags != bio_flags || !contig || !can_merge ||
 		    force_bio_submit ||
-		    merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) ||
 		    bio_add_page(bio, page, page_size, pg_offset) < page_size) {
 			ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
 			if (ret < 0) {
@@ -2813,10 +2823,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
 		wbc_account_io(wbc, page, page_size);
 	}
 
-	if (bio_ret)
-		*bio_ret = bio;
-	else
-		ret = submit_one_bio(bio, mirror_num, bio_flags);
+	*bio_ret = bio;
 
 	return ret;
 }
@@ -2886,8 +2893,7 @@ static int __do_readpage(struct extent_io_tree *tree,
 {
 	struct inode *inode = page->mapping->host;
 	u64 start = page_offset(page);
-	u64 page_end = start + PAGE_SIZE - 1;
-	u64 end;
+	const u64 end = start + PAGE_SIZE - 1;
 	u64 cur = start;
 	u64 extent_offset;
 	u64 last_byte = i_size_read(inode);
@@ -2905,7 +2911,6 @@ static int __do_readpage(struct extent_io_tree *tree,
 
 	set_page_extent_mapped(page);
 
-	end = page_end;
 	if (!PageUptodate(page)) {
 		if (cleancache_get_page(page) == 0) {
 			BUG_ON(blocksize != PAGE_SIZE);
@@ -5230,11 +5235,6 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
 	}
 }
 
-int extent_buffer_uptodate(struct extent_buffer *eb)
-{
-	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-}
-
 int read_extent_buffer_pages(struct extent_io_tree *tree,
 			     struct extent_buffer *eb, int wait, int mirror_num)
 {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a7a850abd600..b77d84909863 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -83,8 +83,8 @@ static inline int le_test_bit(int nr, const u8 *addr)
 	return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
 }
 
-extern void le_bitmap_set(u8 *map, unsigned int start, int len);
-extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
+void le_bitmap_set(u8 *map, unsigned int start, int len);
+void le_bitmap_clear(u8 *map, unsigned int start, int len);
 
 struct extent_state;
 struct btrfs_root;
@@ -95,6 +95,13 @@ struct io_failure_record;
 typedef	blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio,
 				       int mirror_num, unsigned long bio_flags,
 				       u64 bio_offset);
+
+typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
+		struct bio *bio, u64 bio_offset);
+
+typedef blk_status_t (extent_submit_bio_done_t)(void *private_data,
+		struct bio *bio, int mirror_num);
+
 struct extent_io_ops {
 	/*
 	 * The following callbacks must be allways defined, the function
@@ -286,7 +293,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
 			  get_extent_t *get_extent, int mirror_num);
 int __init extent_io_init(void);
-void extent_io_exit(void);
+void __cold extent_io_exit(void);
 
 u64 count_range_bits(struct extent_io_tree *tree,
 		     u64 *start, u64 search_end,
@@ -455,6 +462,11 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
 	atomic_inc(&eb->refs);
 }
 
+static inline int extent_buffer_uptodate(struct extent_buffer *eb)
+{
+	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+}
+
 int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 			 unsigned long start, unsigned long len);
 void read_extent_buffer(const struct extent_buffer *eb, void *dst,
@@ -489,7 +501,6 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb);
 int set_extent_buffer_dirty(struct extent_buffer *eb);
 void set_extent_buffer_uptodate(struct extent_buffer *eb);
 void clear_extent_buffer_uptodate(struct extent_buffer *eb);
-int extent_buffer_uptodate(struct extent_buffer *eb);
 int extent_buffer_under_io(struct extent_buffer *eb);
 int map_private_extent_buffer(const struct extent_buffer *eb,
 			      unsigned long offset, unsigned long min_len,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index d3bd02105d1c..53a0633c6ef7 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -2,7 +2,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/hardirq.h>
 #include "ctree.h"
 #include "extent_map.h"
 #include "compression.h"
@@ -20,7 +19,7 @@ int __init extent_map_init(void)
 	return 0;
 }
 
-void extent_map_exit(void)
+void __cold extent_map_exit(void)
 {
 	kmem_cache_destroy(extent_map_cache);
 }
@@ -552,6 +551,9 @@ int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
 		ret = 0;
 
 		existing = search_extent_mapping(em_tree, start, len);
+
+		trace_btrfs_handle_em_exist(existing, em, start, len);
+
 		/*
 		 * existing will always be non-NULL, since there must be
 		 * extent causing the -EEXIST.
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index b29f77bc0732..f6f8ba114977 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -86,7 +86,7 @@ void replace_extent_mapping(struct extent_map_tree *tree,
 struct extent_map *alloc_extent_map(void);
 void free_extent_map(struct extent_map *em);
 int __init extent_map_init(void);
-void extent_map_exit(void);
+void __cold extent_map_exit(void);
 int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
 void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
 struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 41ab9073d1d4..f247300170e5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1691,7 +1691,7 @@ again:
 				    force_page_uptodate);
 		if (ret) {
 			btrfs_delalloc_release_extents(BTRFS_I(inode),
-						       reserve_bytes);
+						       reserve_bytes, true);
 			break;
 		}
 
@@ -1703,7 +1703,7 @@ again:
 			if (extents_locked == -EAGAIN)
 				goto again;
 			btrfs_delalloc_release_extents(BTRFS_I(inode),
-						       reserve_bytes);
+						       reserve_bytes, true);
 			ret = extents_locked;
 			break;
 		}
@@ -1738,7 +1738,7 @@ again:
 						fs_info->sb->s_blocksize_bits;
 			if (only_release_metadata) {
 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
-								release_bytes);
+							release_bytes, true);
 			} else {
 				u64 __pos;
 
@@ -1747,7 +1747,7 @@ again:
 					(dirty_pages << PAGE_SHIFT);
 				btrfs_delalloc_release_space(inode,
 						data_reserved, __pos,
-						release_bytes);
+						release_bytes, true);
 			}
 		}
 
@@ -1760,7 +1760,8 @@ again:
 		if (extents_locked)
 			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
 					     lockstart, lockend, &cached_state);
-		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
+					       (ret != 0));
 		if (ret) {
 			btrfs_drop_pages(pages, num_pages);
 			break;
@@ -1800,11 +1801,11 @@ again:
 		if (only_release_metadata) {
 			btrfs_end_write_no_snapshotting(root);
 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
-					release_bytes);
+					release_bytes, true);
 		} else {
 			btrfs_delalloc_release_space(inode, data_reserved,
 					round_down(pos, fs_info->sectorsize),
-					release_bytes);
+					release_bytes, true);
 		}
 	}
 
@@ -1997,8 +1998,6 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
 {
 	struct btrfs_file_private *private = filp->private_data;
 
-	if (private && private->trans)
-		btrfs_ioctl_trans_end(filp);
 	if (private && private->filldir_buf)
 		kfree(private->filldir_buf);
 	kfree(private);
@@ -2190,12 +2189,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	}
 
 	/*
-	 * ok we haven't committed the transaction yet, lets do a commit
-	 */
-	if (file->private_data)
-		btrfs_ioctl_trans_end(file);
-
-	/*
 	 * We use start here because we will need to wait on the IO to complete
 	 * in btrfs_sync_log, which could require joining a transaction (for
 	 * example checking cross references in the nocow path).  If we use join
@@ -2214,7 +2207,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	}
 	trans->sync = true;
 
-	ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
+	ret = btrfs_log_dentry_safe(trans, dentry, start, end, &ctx);
 	if (ret < 0) {
 		/* Fallthrough and commit/free transaction. */
 		ret = 1;
@@ -2482,7 +2475,8 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,
 		if ((!ordered ||
 		    (ordered->file_offset + ordered->len <= lockstart ||
 		     ordered->file_offset > lockend)) &&
-		     !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
+		     !filemap_range_has_page(inode->i_mapping,
+					     lockstart, lockend)) {
 			if (ordered)
 				btrfs_put_ordered_extent(ordered);
 			break;
@@ -3378,7 +3372,7 @@ const struct file_operations btrfs_file_operations = {
 	.dedupe_file_range = btrfs_dedupe_file_range,
 };
 
-void btrfs_auto_defrag_exit(void)
+void __cold btrfs_auto_defrag_exit(void)
 {
 	kmem_cache_destroy(btrfs_inode_defrag_cachep);
 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a9f22ac50d6a..d0dde9e6afd7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3547,7 +3547,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 	if (ret) {
 		if (release_metadata)
 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
-					inode->i_size);
+					inode->i_size, true);
 #ifdef DEBUG
 		btrfs_err(fs_info,
 			  "failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index fe5e0324dca9..af36a6a971fe 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1071,7 +1071,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
-	path->reada = 1;
+	path->reada = READA_FORWARD;
 
 	path2 = btrfs_alloc_path();
 	if (!path2) {
@@ -1573,7 +1573,7 @@ int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
 	 */
 	path->skip_locking = 1;
 	path->search_commit_root = 1;
-	path->reada = 1;
+	path->reada = READA_FORWARD;
 
 	info = search_free_space_info(NULL, fs_info, block_group, path, 0);
 	if (IS_ERR(info)) {
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
deleted file mode 100644
index baacc1866861..000000000000
--- a/fs/btrfs/hash.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#include <crypto/hash.h>
-#include <linux/err.h>
-#include "hash.h"
-
-static struct crypto_shash *tfm;
-
-int __init btrfs_hash_init(void)
-{
-	tfm = crypto_alloc_shash("crc32c", 0, 0);
-
-	return PTR_ERR_OR_ZERO(tfm);
-}
-
-const char* btrfs_crc32c_impl(void)
-{
-	return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm));
-}
-
-void btrfs_hash_exit(void)
-{
-	crypto_free_shash(tfm);
-}
-
-u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
-{
-	SHASH_DESC_ON_STACK(shash, tfm);
-	u32 *ctx = (u32 *)shash_desc_ctx(shash);
-	u32 retval;
-	int err;
-
-	shash->tfm = tfm;
-	shash->flags = 0;
-	*ctx = crc;
-
-	err = crypto_shash_update(shash, address, length);
-	BUG_ON(err);
-
-	retval = *ctx;
-	barrier_data(ctx);
-	return retval;
-}
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
deleted file mode 100644
index c3a2ec554361..000000000000
--- a/fs/btrfs/hash.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __HASH__
-#define __HASH__
-
-int __init btrfs_hash_init(void);
-
-void btrfs_hash_exit(void);
-const char* btrfs_crc32c_impl(void);
-
-u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
-
-static inline u64 btrfs_name_hash(const char *name, int len)
-{
-	return btrfs_crc32c((u32)~1, name, len);
-}
-
-/*
- * Figure the key offset of an extended inode ref
- */
-static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
-				    int len)
-{
-	return (u64) btrfs_crc32c(parent_objectid, name, len);
-}
-
-#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 65e1a76bf755..1d5631ef2738 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -18,7 +18,6 @@
 
 #include "ctree.h"
 #include "disk-io.h"
-#include "hash.h"
 #include "transaction.h"
 #include "print-tree.h"
 
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 022b19336fee..9409dcc7020d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -500,12 +500,12 @@ again:
 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 					      prealloc, prealloc, &alloc_hint);
 	if (ret) {
-		btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
 		goto out_put;
 	}
 
 	ret = btrfs_write_out_ino_cache(root, trans, path, inode);
-	btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
 out_put:
 	iput(inode);
 out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f53470112670..1f091c2358a4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -58,7 +58,6 @@
 #include "free-space-cache.h"
 #include "inode-map.h"
 #include "backref.h"
-#include "hash.h"
 #include "props.h"
 #include "qgroup.h"
 #include "dedupe.h"
@@ -102,7 +101,7 @@ static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
 };
 
 static int btrfs_setsize(struct inode *inode, struct iattr *attr);
-static int btrfs_truncate(struct inode *inode);
+static int btrfs_truncate(struct inode *inode, bool skip_writeback);
 static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
 static noinline int cow_file_range(struct inode *inode,
 				   struct page *locked_page,
@@ -277,12 +276,12 @@ fail:
  * does the checks required to make sure the data is small enough
  * to fit as an inline extent.
  */
-static noinline int cow_file_range_inline(struct btrfs_root *root,
-					  struct inode *inode, u64 start,
+static noinline int cow_file_range_inline(struct inode *inode, u64 start,
 					  u64 end, size_t compressed_size,
 					  int compress_type,
 					  struct page **compressed_pages)
 {
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_trans_handle *trans;
 	u64 isize = i_size_read(inode);
@@ -458,7 +457,6 @@ static noinline void compress_file_range(struct inode *inode,
 					int *num_added)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 blocksize = fs_info->sectorsize;
 	u64 actual_end;
 	u64 isize = i_size_read(inode);
@@ -580,11 +578,11 @@ cont:
 			/* we didn't compress the entire range, try
 			 * to make an uncompressed inline extent.
 			 */
-			ret = cow_file_range_inline(root, inode, start, end,
-					    0, BTRFS_COMPRESS_NONE, NULL);
+			ret = cow_file_range_inline(inode, start, end, 0,
+						    BTRFS_COMPRESS_NONE, NULL);
 		} else {
 			/* try making a compressed inline extent */
-			ret = cow_file_range_inline(root, inode, start, end,
+			ret = cow_file_range_inline(inode, start, end,
 						    total_compressed,
 						    compress_type, pages);
 		}
@@ -961,7 +959,6 @@ static noinline int cow_file_range(struct inode *inode,
 	u64 alloc_hint = 0;
 	u64 num_bytes;
 	unsigned long ram_size;
-	u64 disk_num_bytes;
 	u64 cur_alloc_size = 0;
 	u64 blocksize = fs_info->sectorsize;
 	struct btrfs_key ins;
@@ -979,14 +976,14 @@ static noinline int cow_file_range(struct inode *inode,
 
 	num_bytes = ALIGN(end - start + 1, blocksize);
 	num_bytes = max(blocksize,  num_bytes);
-	disk_num_bytes = num_bytes;
+	ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
 
 	inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
 
 	if (start == 0) {
 		/* lets try to make an inline extent */
-		ret = cow_file_range_inline(root, inode, start, end, 0,
-					BTRFS_COMPRESS_NONE, NULL);
+		ret = cow_file_range_inline(inode, start, end, 0,
+					    BTRFS_COMPRESS_NONE, NULL);
 		if (ret == 0) {
 			/*
 			 * We use DO_ACCOUNTING here because we need the
@@ -1010,15 +1007,12 @@ static noinline int cow_file_range(struct inode *inode,
 		}
 	}
 
-	BUG_ON(disk_num_bytes >
-	       btrfs_super_total_bytes(fs_info->super_copy));
-
 	alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
 	btrfs_drop_extent_cache(BTRFS_I(inode), start,
 			start + num_bytes - 1, 0);
 
-	while (disk_num_bytes > 0) {
-		cur_alloc_size = disk_num_bytes;
+	while (num_bytes > 0) {
+		cur_alloc_size = num_bytes;
 		ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
 					   fs_info->sectorsize, 0, alloc_hint,
 					   &ins, 1, 1);
@@ -1082,11 +1076,10 @@ static noinline int cow_file_range(struct inode *inode,
 					     delalloc_end, locked_page,
 					     EXTENT_LOCKED | EXTENT_DELALLOC,
 					     page_ops);
-		if (disk_num_bytes < cur_alloc_size)
-			disk_num_bytes = 0;
+		if (num_bytes < cur_alloc_size)
+			num_bytes = 0;
 		else
-			disk_num_bytes -= cur_alloc_size;
-		num_bytes -= cur_alloc_size;
+			num_bytes -= cur_alloc_size;
 		alloc_hint = ins.objectid + ins.offset;
 		start += cur_alloc_size;
 		extent_reserved = false;
@@ -1262,6 +1255,8 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
 		list_del(&sums->list);
 		kfree(sums);
 	}
+	if (ret < 0)
+		return ret;
 	return 1;
 }
 
@@ -1394,10 +1389,23 @@ next_slot:
 				goto out_check;
 			if (btrfs_extent_readonly(fs_info, disk_bytenr))
 				goto out_check;
-			if (btrfs_cross_ref_exist(root, ino,
-						  found_key.offset -
-						  extent_offset, disk_bytenr))
+			ret = btrfs_cross_ref_exist(root, ino,
+						    found_key.offset -
+						    extent_offset, disk_bytenr);
+			if (ret) {
+				/*
+				 * ret could be -EIO if the above fails to read
+				 * metadata.
+				 */
+				if (ret < 0) {
+					if (cow_start != (u64)-1)
+						cur_offset = cow_start;
+					goto error;
+				}
+
+				WARN_ON_ONCE(nolock);
 				goto out_check;
+			}
 			disk_bytenr += extent_offset;
 			disk_bytenr += cur_offset - found_key.offset;
 			num_bytes = min(end + 1, extent_end) - cur_offset;
@@ -1415,10 +1423,22 @@ next_slot:
 			 * this ensure that csum for a given extent are
 			 * either valid or do not exist.
 			 */
-			if (csum_exist_in_range(fs_info, disk_bytenr,
-						num_bytes)) {
+			ret = csum_exist_in_range(fs_info, disk_bytenr,
+						  num_bytes);
+			if (ret) {
 				if (!nolock)
 					btrfs_end_write_no_snapshotting(root);
+
+				/*
+				 * ret could be -EIO if the above fails to read
+				 * metadata.
+				 */
+				if (ret < 0) {
+					if (cow_start != (u64)-1)
+						cur_offset = cow_start;
+					goto error;
+				}
+				WARN_ON_ONCE(nolock);
 				goto out_check;
 			}
 			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
@@ -1847,7 +1867,7 @@ static void btrfs_clear_bit_hook(void *private_data,
 		 */
 		if (*bits & EXTENT_CLEAR_META_RESV &&
 		    root != fs_info->tree_root)
-			btrfs_delalloc_release_metadata(inode, len);
+			btrfs_delalloc_release_metadata(inode, len, false);
 
 		/* For sanity tests. */
 		if (btrfs_is_testing(fs_info))
@@ -1921,8 +1941,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio,
-				    int mirror_num, unsigned long bio_flags,
+static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
 				    u64 bio_offset)
 {
 	struct inode *inode = private_data;
@@ -1941,9 +1960,8 @@ static blk_status_t __btrfs_submit_bio_start(void *private_data, struct bio *bio
  * At IO completion time the cums attached on the ordered extent record
  * are inserted into the btree
  */
-static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio,
-			  int mirror_num, unsigned long bio_flags,
-			  u64 bio_offset)
+static blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
+			  int mirror_num)
 {
 	struct inode *inode = private_data;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2015,8 +2033,8 @@ static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
 		/* we're doing a write, do the async checksumming */
 		ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
 					  bio_offset, inode,
-					  __btrfs_submit_bio_start,
-					  __btrfs_submit_bio_done);
+					  btrfs_submit_bio_start,
+					  btrfs_submit_bio_done);
 		goto out;
 	} else if (!skip_sum) {
 		ret = btrfs_csum_one_bio(inode, bio, 0, 0);
@@ -2134,7 +2152,7 @@ again:
 
 	ClearPageChecked(page);
 	set_page_dirty(page);
-	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
 out:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
 			     &cached_state);
@@ -2754,12 +2772,10 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
 	struct sa_defrag_extent_backref *backref;
 	struct sa_defrag_extent_backref *prev = NULL;
 	struct inode *inode;
-	struct btrfs_root *root;
 	struct rb_node *node;
 	int ret;
 
 	inode = new->inode;
-	root = BTRFS_I(inode)->root;
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -3247,6 +3263,16 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 				      start, (size_t)(end - start + 1));
 }
 
+/*
+ * btrfs_add_delayed_iput - perform a delayed iput on @inode
+ *
+ * @inode: The inode we want to perform iput on
+ *
+ * This function uses the generic vfs_inode::i_count to track whether we should
+ * just decrement it (in case it's > 1) or if this is the last iput then link
+ * the inode to the delayed iput machinery. Delayed iputs are processed at
+ * transaction commit time/superblock commit/cleaner kthread.
+ */
 void btrfs_add_delayed_iput(struct inode *inode)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -3256,12 +3282,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
 		return;
 
 	spin_lock(&fs_info->delayed_iput_lock);
-	if (binode->delayed_iput_count == 0) {
-		ASSERT(list_empty(&binode->delayed_iput));
-		list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
-	} else {
-		binode->delayed_iput_count++;
-	}
+	ASSERT(list_empty(&binode->delayed_iput));
+	list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
 	spin_unlock(&fs_info->delayed_iput_lock);
 }
 
@@ -3274,13 +3296,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
 
 		inode = list_first_entry(&fs_info->delayed_iputs,
 				struct btrfs_inode, delayed_iput);
-		if (inode->delayed_iput_count) {
-			inode->delayed_iput_count--;
-			list_move_tail(&inode->delayed_iput,
-					&fs_info->delayed_iputs);
-		} else {
-			list_del_init(&inode->delayed_iput);
-		}
+		list_del_init(&inode->delayed_iput);
 		spin_unlock(&fs_info->delayed_iput_lock);
 		iput(&inode->vfs_inode);
 		spin_lock(&fs_info->delayed_iput_lock);
@@ -3350,7 +3366,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 	struct btrfs_root *root = inode->root;
 	struct btrfs_block_rsv *block_rsv = NULL;
 	int reserve = 0;
-	int insert = 0;
+	bool insert = false;
 	int ret;
 
 	if (!root->orphan_block_rsv) {
@@ -3360,7 +3376,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 			return -ENOMEM;
 	}
 
+	if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+			      &inode->runtime_flags))
+		insert = true;
+
+	if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
+			      &inode->runtime_flags))
+		reserve = 1;
+
 	spin_lock(&root->orphan_lock);
+	/* If someone has created ->orphan_block_rsv, be happy to use it. */
 	if (!root->orphan_block_rsv) {
 		root->orphan_block_rsv = block_rsv;
 	} else if (block_rsv) {
@@ -3368,26 +3393,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 		block_rsv = NULL;
 	}
 
-	if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-			      &inode->runtime_flags)) {
-#if 0
-		/*
-		 * For proper ENOSPC handling, we should do orphan
-		 * cleanup when mounting. But this introduces backward
-		 * compatibility issue.
-		 */
-		if (!xchg(&root->orphan_item_inserted, 1))
-			insert = 2;
-		else
-			insert = 1;
-#endif
-		insert = 1;
+	if (insert)
 		atomic_inc(&root->orphan_inodes);
-	}
-
-	if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-			      &inode->runtime_flags))
-		reserve = 1;
 	spin_unlock(&root->orphan_lock);
 
 	/* grab metadata reservation from transaction handle */
@@ -3411,7 +3418,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 	}
 
 	/* insert an orphan item to track this unlinked/truncated file */
-	if (insert >= 1) {
+	if (insert) {
 		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
 		if (ret) {
 			if (reserve) {
@@ -3435,15 +3442,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 		ret = 0;
 	}
 
-	/* insert an orphan item to track subvolume contains orphan files */
-	if (insert >= 2) {
-		ret = btrfs_insert_orphan_item(trans, fs_info->tree_root,
-					       root->root_key.objectid);
-		if (ret && ret != -EEXIST) {
-			btrfs_abort_transaction(trans, ret);
-			return ret;
-		}
-	}
 	return 0;
 }
 
@@ -3644,7 +3642,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 				goto out;
 			}
 
-			ret = btrfs_truncate(inode);
+			ret = btrfs_truncate(inode, false);
 			if (ret)
 				btrfs_orphan_del(NULL, BTRFS_I(inode));
 		} else {
@@ -4711,7 +4709,6 @@ delete:
 				if (updates) {
 					trans->delayed_ref_updates = 0;
 					ret = btrfs_run_delayed_refs(trans,
-								   fs_info,
 								   updates * 2);
 					if (ret && !err)
 						err = ret;
@@ -4751,8 +4748,7 @@ error:
 		unsigned long updates = trans->delayed_ref_updates;
 		if (updates) {
 			trans->delayed_ref_updates = 0;
-			ret = btrfs_run_delayed_refs(trans, fs_info,
-						     updates * 2);
+			ret = btrfs_run_delayed_refs(trans, updates * 2);
 			if (ret && !err)
 				err = ret;
 		}
@@ -4806,8 +4802,8 @@ again:
 	page = find_or_create_page(mapping, index, mask);
 	if (!page) {
 		btrfs_delalloc_release_space(inode, data_reserved,
-					     block_start, blocksize);
-		btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+					     block_start, blocksize, true);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -4874,8 +4870,8 @@ again:
 out_unlock:
 	if (ret)
 		btrfs_delalloc_release_space(inode, data_reserved, block_start,
-					     blocksize);
-	btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
+					     blocksize, true);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
 	unlock_page(page);
 	put_page(page);
 out:
@@ -5130,7 +5126,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 		inode_dio_wait(inode);
 		btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
 
-		ret = btrfs_truncate(inode);
+		ret = btrfs_truncate(inode, newsize == oldsize);
 		if (ret && inode->i_nlink) {
 			int err;
 
@@ -5466,7 +5462,8 @@ no_delete:
 
 /*
  * this returns the key found in the dir entry in the location pointer.
- * If no dir entries were found, location->objectid is 0.
+ * If no dir entries were found, returns -ENOENT.
+ * If found a corrupted location in dir entry, returns -EUCLEAN.
  */
 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
 			       struct btrfs_key *location)
@@ -5484,27 +5481,27 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
 
 	di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
 			name, namelen, 0);
-	if (IS_ERR(di))
+	if (!di) {
+		ret = -ENOENT;
+		goto out;
+	}
+	if (IS_ERR(di)) {
 		ret = PTR_ERR(di);
-
-	if (IS_ERR_OR_NULL(di))
-		goto out_err;
+		goto out;
+	}
 
 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
 	if (location->type != BTRFS_INODE_ITEM_KEY &&
 	    location->type != BTRFS_ROOT_ITEM_KEY) {
+		ret = -EUCLEAN;
 		btrfs_warn(root->fs_info,
 "%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
 			   __func__, name, btrfs_ino(BTRFS_I(dir)),
 			   location->objectid, location->type, location->offset);
-		goto out_err;
 	}
 out:
 	btrfs_free_path(path);
 	return ret;
-out_err:
-	location->objectid = 0;
-	goto out;
 }
 
 /*
@@ -5807,9 +5804,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	if (ret < 0)
 		return ERR_PTR(ret);
 
-	if (location.objectid == 0)
-		return ERR_PTR(-ENOENT);
-
 	if (location.type == BTRFS_INODE_ITEM_KEY) {
 		inode = btrfs_iget(dir->i_sb, &location, root, NULL);
 		return inode;
@@ -7443,76 +7437,6 @@ out:
 	return ret;
 }
 
-bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
-{
-	struct radix_tree_root *root = &inode->i_mapping->page_tree;
-	bool found = false;
-	void **pagep = NULL;
-	struct page *page = NULL;
-	unsigned long start_idx;
-	unsigned long end_idx;
-
-	start_idx = start >> PAGE_SHIFT;
-
-	/*
-	 * end is the last byte in the last page.  end == start is legal
-	 */
-	end_idx = end >> PAGE_SHIFT;
-
-	rcu_read_lock();
-
-	/* Most of the code in this while loop is lifted from
-	 * find_get_page.  It's been modified to begin searching from a
-	 * page and return just the first page found in that range.  If the
-	 * found idx is less than or equal to the end idx then we know that
-	 * a page exists.  If no pages are found or if those pages are
-	 * outside of the range then we're fine (yay!) */
-	while (page == NULL &&
-	       radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
-		page = radix_tree_deref_slot(pagep);
-		if (unlikely(!page))
-			break;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				page = NULL;
-				continue;
-			}
-			/*
-			 * Otherwise, shmem/tmpfs must be storing a swap entry
-			 * here as an exceptional entry: so return it without
-			 * attempting to raise page count.
-			 */
-			page = NULL;
-			break; /* TODO: Is this relevant for this use case? */
-		}
-
-		if (!page_cache_get_speculative(page)) {
-			page = NULL;
-			continue;
-		}
-
-		/*
-		 * Has the page moved?
-		 * This is part of the lockless pagecache protocol. See
-		 * include/linux/pagemap.h for details.
-		 */
-		if (unlikely(page != *pagep)) {
-			put_page(page);
-			page = NULL;
-		}
-	}
-
-	if (page) {
-		if (page->index <= end_idx)
-			found = true;
-		put_page(page);
-	}
-
-	rcu_read_unlock();
-	return found;
-}
-
 static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 			      struct extent_state **cached_state, int writing)
 {
@@ -7538,8 +7462,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 		 * get stale data.
 		 */
 		if (!ordered &&
-		    (!writing ||
-		     !btrfs_page_exists_in_range(inode, lockstart, lockend)))
+		    (!writing || !filemap_range_has_page(inode->i_mapping,
+							 lockstart, lockend)))
 			break;
 
 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
@@ -8270,9 +8194,8 @@ static void btrfs_endio_direct_write(struct bio *bio)
 	bio_put(bio);
 }
 
-static blk_status_t __btrfs_submit_bio_start_direct_io(void *private_data,
-				    struct bio *bio, int mirror_num,
-				    unsigned long bio_flags, u64 offset)
+static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
+				    struct bio *bio, u64 offset)
 {
 	struct inode *inode = private_data;
 	blk_status_t ret;
@@ -8298,13 +8221,13 @@ static void btrfs_end_dio_bio(struct bio *bio)
 		err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
 
 	if (err) {
-		dip->errors = 1;
-
 		/*
-		 * before atomic variable goto zero, we must make sure
-		 * dip->errors is perceived to be set.
+		 * We want to perceive the errors flag being set before
+		 * decrementing the reference count. We don't need a barrier
+		 * since atomic operations with a return value are fully
+		 * ordered as per atomic_t.txt
 		 */
-		smp_mb__before_atomic();
+		dip->errors = 1;
 	}
 
 	/* if there are more bios still pending for this dio, just exit */
@@ -8352,9 +8275,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
 	return 0;
 }
 
-static inline blk_status_t
-__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
-		       int async_submit)
+static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
+		struct inode *inode, u64 file_offset, int async_submit)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_dio_private *dip = bio->bi_private;
@@ -8377,8 +8299,8 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
 	if (write && async_submit) {
 		ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
 					  file_offset, inode,
-					  __btrfs_submit_bio_start_direct_io,
-					  __btrfs_submit_bio_done);
+					  btrfs_submit_bio_start_direct_io,
+					  btrfs_submit_bio_done);
 		goto err;
 	} else if (write) {
 		/*
@@ -8464,7 +8386,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
 		 */
 		atomic_inc(&dip->pending_bios);
 
-		status = __btrfs_submit_dio_bio(bio, inode, file_offset,
+		status = btrfs_submit_dio_bio(bio, inode, file_offset,
 						async_submit);
 		if (status) {
 			bio_put(bio);
@@ -8484,7 +8406,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
 	} while (submit_len > 0);
 
 submit:
-	status = __btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
+	status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
 	if (!status)
 		return 0;
 
@@ -8492,10 +8414,11 @@ submit:
 out_err:
 	dip->errors = 1;
 	/*
-	 * before atomic variable goto zero, we must
-	 * make sure dip->errors is perceived to be set.
+	 * Before atomic variable goto zero, we must  make sure dip->errors is
+	 * perceived to be set. This ordering is ensured by the fact that an
+	 * atomic operations with a return value are fully ordered as per
+	 * atomic_t.txt
 	 */
-	smp_mb__before_atomic();
 	if (atomic_dec_and_test(&dip->pending_bios))
 		bio_io_error(dip->orig_bio);
 
@@ -8713,7 +8636,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 		if (ret < 0 && ret != -EIOCBQUEUED) {
 			if (dio_data.reserve)
 				btrfs_delalloc_release_space(inode, data_reserved,
-					offset, dio_data.reserve);
+					offset, dio_data.reserve, true);
 			/*
 			 * On error we might have left some ordered extents
 			 * without submitting corresponding bios for them, so
@@ -8729,8 +8652,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 					false);
 		} else if (ret >= 0 && (size_t)ret < count)
 			btrfs_delalloc_release_space(inode, data_reserved,
-					offset, count - (size_t)ret);
-		btrfs_delalloc_release_extents(BTRFS_I(inode), count);
+					offset, count - (size_t)ret, true);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
 	}
 out:
 	if (wakeup)
@@ -9045,7 +8968,8 @@ again:
 		if (reserved_space < PAGE_SIZE) {
 			end = page_start + reserved_space - 1;
 			btrfs_delalloc_release_space(inode, data_reserved,
-					page_start, PAGE_SIZE - reserved_space);
+					page_start, PAGE_SIZE - reserved_space,
+					true);
 		}
 	}
 
@@ -9095,23 +9019,23 @@ again:
 
 out_unlock:
 	if (!ret) {
-		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
 		sb_end_pagefault(inode->i_sb);
 		extent_changeset_free(data_reserved);
 		return VM_FAULT_LOCKED;
 	}
 	unlock_page(page);
 out:
-	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
 	btrfs_delalloc_release_space(inode, data_reserved, page_start,
-				     reserved_space);
+				     reserved_space, (ret != 0));
 out_noreserve:
 	sb_end_pagefault(inode->i_sb);
 	extent_changeset_free(data_reserved);
 	return ret;
 }
 
-static int btrfs_truncate(struct inode *inode)
+static int btrfs_truncate(struct inode *inode, bool skip_writeback)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -9122,10 +9046,12 @@ static int btrfs_truncate(struct inode *inode)
 	u64 mask = fs_info->sectorsize - 1;
 	u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
 
-	ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
-				       (u64)-1);
-	if (ret)
-		return ret;
+	if (!skip_writeback) {
+		ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
+					       (u64)-1);
+		if (ret)
+			return ret;
+	}
 
 	/*
 	 * Yes ladies and gentlemen, this is indeed ugly.  The fact is we have
@@ -9335,7 +9261,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 	ei->dir_index = 0;
 	ei->last_unlink_trans = 0;
 	ei->last_log_commit = 0;
-	ei->delayed_iput_count = 0;
 
 	spin_lock_init(&ei->lock);
 	ei->outstanding_extents = 0;
@@ -9455,7 +9380,7 @@ static void init_once(void *foo)
 	inode_init_once(&ei->vfs_inode);
 }
 
-void btrfs_destroy_cachep(void)
+void __cold btrfs_destroy_cachep(void)
 {
 	/*
 	 * Make sure all delayed rcu free inodes are flushed before we
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3278ae592a2c..b2db3988813f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -106,7 +106,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 		       int no_time_update);
 
 /* Mask out flags that are inappropriate for the given type of inode. */
-static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
+static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags)
 {
 	if (S_ISDIR(mode))
 		return flags;
@@ -1197,7 +1197,7 @@ again:
 		spin_unlock(&BTRFS_I(inode)->lock);
 		btrfs_delalloc_release_space(inode, data_reserved,
 				start_index << PAGE_SHIFT,
-				(page_cnt - i_done) << PAGE_SHIFT);
+				(page_cnt - i_done) << PAGE_SHIFT, true);
 	}
 
 
@@ -1215,7 +1215,8 @@ again:
 		unlock_page(pages[i]);
 		put_page(pages[i]);
 	}
-	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
+				       false);
 	extent_changeset_free(data_reserved);
 	return i_done;
 out:
@@ -1225,8 +1226,9 @@ out:
 	}
 	btrfs_delalloc_release_space(inode, data_reserved,
 			start_index << PAGE_SHIFT,
-			page_cnt << PAGE_SHIFT);
-	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
+			page_cnt << PAGE_SHIFT, true);
+	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
+				       true);
 	extent_changeset_free(data_reserved);
 	return ret;
 
@@ -2600,7 +2602,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 			range->len = (u64)-1;
 		}
 		ret = btrfs_defrag_file(file_inode(file), file,
-					range, 0, 0);
+					range, BTRFS_OLDEST_GENERATION, 0);
 		if (ret > 0)
 			ret = 0;
 		kfree(range);
@@ -3936,73 +3938,6 @@ int btrfs_clone_file_range(struct file *src_file, loff_t off,
 	return btrfs_clone_files(dst_file, src_file, off, len, destoff);
 }
 
-/*
- * there are many ways the trans_start and trans_end ioctls can lead
- * to deadlocks.  They should only be used by applications that
- * basically own the machine, and have a very in depth understanding
- * of all the possible deadlocks and enospc problems.
- */
-static long btrfs_ioctl_trans_start(struct file *file)
-{
-	struct inode *inode = file_inode(file);
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct btrfs_trans_handle *trans;
-	struct btrfs_file_private *private;
-	int ret;
-	static bool warned = false;
-
-	ret = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
-		goto out;
-
-	if (!warned) {
-		btrfs_warn(fs_info,
-			"Userspace transaction mechanism is considered "
-			"deprecated and slated to be removed in 4.17. "
-			"If you have a valid use case please "
-			"speak up on the mailing list");
-		WARN_ON(1);
-		warned = true;
-	}
-
-	ret = -EINPROGRESS;
-	private = file->private_data;
-	if (private && private->trans)
-		goto out;
-	if (!private) {
-		private = kzalloc(sizeof(struct btrfs_file_private),
-				  GFP_KERNEL);
-		if (!private)
-			return -ENOMEM;
-		file->private_data = private;
-	}
-
-	ret = -EROFS;
-	if (btrfs_root_readonly(root))
-		goto out;
-
-	ret = mnt_want_write_file(file);
-	if (ret)
-		goto out;
-
-	atomic_inc(&fs_info->open_ioctl_trans);
-
-	ret = -ENOMEM;
-	trans = btrfs_start_ioctl_transaction(root);
-	if (IS_ERR(trans))
-		goto out_drop;
-
-	private->trans = trans;
-	return 0;
-
-out_drop:
-	atomic_dec(&fs_info->open_ioctl_trans);
-	mnt_drop_write_file(file);
-out:
-	return ret;
-}
-
 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 {
 	struct inode *inode = file_inode(file);
@@ -4244,30 +4179,6 @@ out:
 	return ret;
 }
 
-/*
- * there are many ways the trans_start and trans_end ioctls can lead
- * to deadlocks.  They should only be used by applications that
- * basically own the machine, and have a very in depth understanding
- * of all the possible deadlocks and enospc problems.
- */
-long btrfs_ioctl_trans_end(struct file *file)
-{
-	struct inode *inode = file_inode(file);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct btrfs_file_private *private = file->private_data;
-
-	if (!private || !private->trans)
-		return -EINVAL;
-
-	btrfs_end_transaction(private->trans);
-	private->trans = NULL;
-
-	atomic_dec(&root->fs_info->open_ioctl_trans);
-
-	mnt_drop_write_file(file);
-	return 0;
-}
-
 static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
 					    void __user *argp)
 {
@@ -4429,7 +4340,8 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
 		ret = 0;
 		break;
 	case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
-		ret = btrfs_dev_replace_cancel(fs_info, p);
+		p->result = btrfs_dev_replace_cancel(fs_info);
+		ret = 0;
 		break;
 	default:
 		ret = -EINVAL;
@@ -5138,10 +5050,17 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
 	received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
 				       BTRFS_UUID_SIZE);
 	if (received_uuid_changed &&
-	    !btrfs_is_empty_uuid(root_item->received_uuid))
-		btrfs_uuid_tree_rem(trans, fs_info, root_item->received_uuid,
-				    BTRFS_UUID_KEY_RECEIVED_SUBVOL,
-				    root->root_key.objectid);
+	    !btrfs_is_empty_uuid(root_item->received_uuid)) {
+		ret = btrfs_uuid_tree_rem(trans, fs_info,
+					  root_item->received_uuid,
+					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+					  root->root_key.objectid);
+		if (ret && ret != -ENOENT) {
+		        btrfs_abort_transaction(trans, ret);
+		        btrfs_end_transaction(trans);
+		        goto out;
+		}
+	}
 	memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
 	btrfs_set_root_stransid(root_item, sa->stransid);
 	btrfs_set_root_rtransid(root_item, sa->rtransid);
@@ -5574,10 +5493,6 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_dev_info(fs_info, argp);
 	case BTRFS_IOC_BALANCE:
 		return btrfs_ioctl_balance(file, NULL);
-	case BTRFS_IOC_TRANS_START:
-		return btrfs_ioctl_trans_start(file);
-	case BTRFS_IOC_TRANS_END:
-		return btrfs_ioctl_trans_end(file);
 	case BTRFS_IOC_TREE_SEARCH:
 		return btrfs_ioctl_tree_search(file, argp);
 	case BTRFS_IOC_TREE_SEARCH_V2:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index d13128c70ddd..621083f8932c 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -290,7 +290,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
 		/*
 		 * Make sure counter is updated before we wake up waiters.
 		 */
-		smp_mb();
+		smp_mb__after_atomic();
 		if (waitqueue_active(&eb->write_lock_wq))
 			wake_up(&eb->write_lock_wq);
 	} else {
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 6c7f18cd3b61..1c7f7f70caf4 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -382,14 +382,12 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	size_t in_len;
 	size_t out_len;
-	size_t tot_len;
 	int ret = 0;
 	char *kaddr;
 	unsigned long bytes;
 
 	BUG_ON(srclen < LZO_LEN);
 
-	tot_len = read_compress_length(data_in);
 	data_in += LZO_LEN;
 
 	in_len = read_compress_length(data_in);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5b311aeddcc8..661cc3db0c7c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -610,7 +610,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
 	btrfs_mod_outstanding_extents(btrfs_inode, -1);
 	spin_unlock(&btrfs_inode->lock);
 	if (root != fs_info->tree_root)
-		btrfs_delalloc_release_metadata(btrfs_inode, entry->len);
+		btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
 
 	tree = &btrfs_inode->ordered_tree;
 	spin_lock_irq(&tree->lock);
@@ -1154,7 +1154,7 @@ int __init ordered_data_init(void)
 	return 0;
 }
 
-void ordered_data_exit(void)
+void __cold ordered_data_exit(void)
 {
 	kmem_cache_destroy(btrfs_ordered_extent_cache);
 }
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 56c4c0ee6381..4a1672a13ba6 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -151,7 +151,9 @@ static inline int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info,
 					 unsigned long bytes)
 {
 	int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
-	return sizeof(struct btrfs_ordered_sum) + num_sectors * sizeof(u32);
+	int csum_size = btrfs_super_csum_size(fs_info->super_copy);
+
+	return sizeof(struct btrfs_ordered_sum) + num_sectors * csum_size;
 }
 
 static inline void
@@ -215,5 +217,5 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *log, u64 transid);
 void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
 int __init ordered_data_init(void);
-void ordered_data_exit(void);
+void __cold ordered_data_exit(void);
 #endif
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 569205e651c7..4a8770485f77 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -365,9 +365,13 @@ void btrfs_print_tree(struct extent_buffer *c)
 		       btrfs_node_blockptr(c, i));
 	}
 	for (i = 0; i < nr; i++) {
-		struct extent_buffer *next = read_tree_block(fs_info,
-					btrfs_node_blockptr(c, i),
-					btrfs_node_ptr_generation(c, i));
+		struct btrfs_key first_key;
+		struct extent_buffer *next;
+
+		btrfs_node_key_to_cpu(c, &first_key, i);
+		next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
+				       btrfs_node_ptr_generation(c, i),
+				       level - 1, &first_key);
 		if (IS_ERR(next)) {
 			continue;
 		} else if (!extent_buffer_uptodate(next)) {
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index b30a056963ab..5859f7d3cf3e 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -19,8 +19,8 @@
 #include <linux/hashtable.h>
 #include "props.h"
 #include "btrfs_inode.h"
-#include "hash.h"
 #include "transaction.h"
+#include "ctree.h"
 #include "xattr.h"
 #include "compression.h"
 
@@ -116,7 +116,7 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
 		return -EINVAL;
 
 	if (value_len == 0) {
-		ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
+		ret = btrfs_setxattr(trans, inode, handler->xattr_name,
 				       NULL, 0, flags);
 		if (ret)
 			return ret;
@@ -130,13 +130,13 @@ static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
 	ret = handler->validate(value, value_len);
 	if (ret)
 		return ret;
-	ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
+	ret = btrfs_setxattr(trans, inode, handler->xattr_name,
 			       value, value_len, flags);
 	if (ret)
 		return ret;
 	ret = handler->apply(inode, value, value_len);
 	if (ret) {
-		__btrfs_setxattr(trans, inode, handler->xattr_name,
+		btrfs_setxattr(trans, inode, handler->xattr_name,
 				 NULL, 0, flags);
 		return ret;
 	}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index aa259d6986e1..f583f13ff26e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -47,6 +47,82 @@
  *  - check all ioctl parameters
  */
 
+/*
+ * Helpers to access qgroup reservation
+ *
+ * Callers should ensure the lock context and type are valid
+ */
+
+static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
+{
+	u64 ret = 0;
+	int i;
+
+	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+		ret += qgroup->rsv.values[i];
+
+	return ret;
+}
+
+#ifdef CONFIG_BTRFS_DEBUG
+static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
+{
+	if (type == BTRFS_QGROUP_RSV_DATA)
+		return "data";
+	if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
+		return "meta_pertrans";
+	if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
+		return "meta_prealloc";
+	return NULL;
+}
+#endif
+
+static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
+			   struct btrfs_qgroup *qgroup, u64 num_bytes,
+			   enum btrfs_qgroup_rsv_type type)
+{
+	trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
+	qgroup->rsv.values[type] += num_bytes;
+}
+
+static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
+			       struct btrfs_qgroup *qgroup, u64 num_bytes,
+			       enum btrfs_qgroup_rsv_type type)
+{
+	trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
+	if (qgroup->rsv.values[type] >= num_bytes) {
+		qgroup->rsv.values[type] -= num_bytes;
+		return;
+	}
+#ifdef CONFIG_BTRFS_DEBUG
+	WARN_RATELIMIT(1,
+		"qgroup %llu %s reserved space underflow, have %llu to free %llu",
+		qgroup->qgroupid, qgroup_rsv_type_str(type),
+		qgroup->rsv.values[type], num_bytes);
+#endif
+	qgroup->rsv.values[type] = 0;
+}
+
+static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
+				     struct btrfs_qgroup *dest,
+				     struct btrfs_qgroup *src)
+{
+	int i;
+
+	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+		qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
+}
+
+static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
+					 struct btrfs_qgroup *dest,
+					  struct btrfs_qgroup *src)
+{
+	int i;
+
+	for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+		qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
+}
+
 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
 					   int mod)
 {
@@ -826,10 +902,8 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 	int slot;
 
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
-	if (fs_info->quota_root) {
-		set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags);
+	if (fs_info->quota_root)
 		goto out;
-	}
 
 	fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
 	if (!fs_info->qgroup_ulist) {
@@ -923,8 +997,15 @@ out_add_root:
 	}
 	spin_lock(&fs_info->qgroup_lock);
 	fs_info->quota_root = quota_root;
-	set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags);
+	set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
 	spin_unlock(&fs_info->qgroup_lock);
+	ret = qgroup_rescan_init(fs_info, 0, 1);
+	if (!ret) {
+	        qgroup_rescan_zero_tracking(fs_info);
+	        btrfs_queue_work(fs_info->qgroup_rescan_workers,
+	                         &fs_info->qgroup_rescan_work);
+	}
+
 out_free_path:
 	btrfs_free_path(path);
 out_free_root:
@@ -991,33 +1072,29 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
 		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 }
 
-static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
-				      struct btrfs_qgroup *qgroup,
-				      u64 num_bytes)
-{
-#ifdef CONFIG_BTRFS_DEBUG
-	WARN_ON(qgroup->reserved < num_bytes);
-	btrfs_debug(fs_info,
-		"qgroup %llu reserved space underflow, have: %llu, to free: %llu",
-		qgroup->qgroupid, qgroup->reserved, num_bytes);
-#endif
-	qgroup->reserved = 0;
-}
 /*
- * The easy accounting, if we are adding/removing the only ref for an extent
- * then this qgroup and all of the parent qgroups get their reference and
- * exclusive counts adjusted.
+ * The easy accounting, we're updating qgroup relationship whose child qgroup
+ * only has exclusive extents.
+ *
+ * In this case, all exclsuive extents will also be exlusive for parent, so
+ * excl/rfer just get added/removed.
+ *
+ * So is qgroup reservation space, which should also be added/removed to
+ * parent.
+ * Or when child tries to release reservation space, parent will underflow its
+ * reservation (for relationship adding case).
  *
  * Caller should hold fs_info->qgroup_lock.
  */
 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 				    struct ulist *tmp, u64 ref_root,
-				    u64 num_bytes, int sign)
+				    struct btrfs_qgroup *src, int sign)
 {
 	struct btrfs_qgroup *qgroup;
 	struct btrfs_qgroup_list *glist;
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
+	u64 num_bytes = src->excl;
 	int ret = 0;
 
 	qgroup = find_qgroup_rb(fs_info, ref_root);
@@ -1030,13 +1107,11 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 	WARN_ON(sign < 0 && qgroup->excl < num_bytes);
 	qgroup->excl += sign * num_bytes;
 	qgroup->excl_cmpr += sign * num_bytes;
-	if (sign > 0) {
-		trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
-		if (qgroup->reserved < num_bytes)
-			report_reserved_underflow(fs_info, qgroup, num_bytes);
-		else
-			qgroup->reserved -= num_bytes;
-	}
+
+	if (sign > 0)
+		qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
+	else
+		qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
 
 	qgroup_dirty(fs_info, qgroup);
 
@@ -1056,15 +1131,10 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
 		qgroup->rfer_cmpr += sign * num_bytes;
 		WARN_ON(sign < 0 && qgroup->excl < num_bytes);
 		qgroup->excl += sign * num_bytes;
-		if (sign > 0) {
-			trace_qgroup_update_reserve(fs_info, qgroup,
-						    -(s64)num_bytes);
-			if (qgroup->reserved < num_bytes)
-				report_reserved_underflow(fs_info, qgroup,
-							  num_bytes);
-			else
-				qgroup->reserved -= num_bytes;
-		}
+		if (sign > 0)
+			qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
+		else
+			qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
 		qgroup->excl_cmpr += sign * num_bytes;
 		qgroup_dirty(fs_info, qgroup);
 
@@ -1107,7 +1177,7 @@ static int quick_update_accounting(struct btrfs_fs_info *fs_info,
 	if (qgroup->excl == qgroup->rfer) {
 		ret = 0;
 		err = __qgroup_excl_accounting(fs_info, tmp, dst,
-					       qgroup->excl, sign);
+					       qgroup, sign);
 		if (err < 0) {
 			ret = err;
 			goto out;
@@ -1414,7 +1484,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
 	struct btrfs_qgroup_extent_record *entry;
 	u64 bytenr = record->bytenr;
 
-	assert_spin_locked(&delayed_refs->lock);
+	lockdep_assert_held(&delayed_refs->lock);
 	trace_btrfs_qgroup_trace_extent(fs_info, record);
 
 	while (*p) {
@@ -1614,7 +1684,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
 		return 0;
 
 	if (!extent_buffer_uptodate(root_eb)) {
-		ret = btrfs_read_buffer(root_eb, root_gen);
+		ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL);
 		if (ret)
 			goto out;
 	}
@@ -1645,6 +1715,7 @@ walk_down:
 	level = root_level;
 	while (level >= 0) {
 		if (path->nodes[level] == NULL) {
+			struct btrfs_key first_key;
 			int parent_slot;
 			u64 child_gen;
 			u64 child_bytenr;
@@ -1657,8 +1728,10 @@ walk_down:
 			parent_slot = path->slots[level + 1];
 			child_bytenr = btrfs_node_blockptr(eb, parent_slot);
 			child_gen = btrfs_node_ptr_generation(eb, parent_slot);
+			btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
 
-			eb = read_tree_block(fs_info, child_bytenr, child_gen);
+			eb = read_tree_block(fs_info, child_bytenr, child_gen,
+					     level, &first_key);
 			if (IS_ERR(eb)) {
 				ret = PTR_ERR(eb);
 				goto out;
@@ -2009,9 +2082,9 @@ out_free:
 	return ret;
 }
 
-int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info)
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_qgroup_extent_record *record;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct ulist *new_roots = NULL;
@@ -2080,17 +2153,9 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_root *quota_root = fs_info->quota_root;
 	int ret = 0;
-	int start_rescan_worker = 0;
 
 	if (!quota_root)
-		goto out;
-
-	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
-	    test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
-		start_rescan_worker = 1;
-
-	if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
-		set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+		return ret;
 
 	spin_lock(&fs_info->qgroup_lock);
 	while (!list_empty(&fs_info->dirty_qgroups)) {
@@ -2119,18 +2184,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 	if (ret)
 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 
-	if (!ret && start_rescan_worker) {
-		ret = qgroup_rescan_init(fs_info, 0, 1);
-		if (!ret) {
-			qgroup_rescan_zero_tracking(fs_info);
-			btrfs_queue_work(fs_info->qgroup_rescan_workers,
-					 &fs_info->qgroup_rescan_work);
-		}
-		ret = 0;
-	}
-
-out:
-
 	return ret;
 }
 
@@ -2338,24 +2391,24 @@ out:
 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
 {
 	if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
-	    qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
+	    qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
 		return false;
 
 	if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
-	    qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
+	    qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
 		return false;
 
 	return true;
 }
 
-static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
+static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
+			  enum btrfs_qgroup_rsv_type type)
 {
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *qgroup;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 ref_root = root->root_key.objectid;
 	int ret = 0;
-	int retried = 0;
 	struct ulist_node *unode;
 	struct ulist_iterator uiter;
 
@@ -2369,7 +2422,6 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
 	    capable(CAP_SYS_RESOURCE))
 		enforce = false;
 
-retry:
 	spin_lock(&fs_info->qgroup_lock);
 	quota_root = fs_info->quota_root;
 	if (!quota_root)
@@ -2385,7 +2437,7 @@ retry:
 	 */
 	ulist_reinit(fs_info->qgroup_ulist);
 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
-			(uintptr_t)qgroup, GFP_ATOMIC);
+			qgroup_to_aux(qgroup), GFP_ATOMIC);
 	if (ret < 0)
 		goto out;
 	ULIST_ITER_INIT(&uiter);
@@ -2396,27 +2448,6 @@ retry:
 		qg = unode_aux_to_qgroup(unode);
 
 		if (enforce && !qgroup_check_limits(qg, num_bytes)) {
-			/*
-			 * Commit the tree and retry, since we may have
-			 * deletions which would free up space.
-			 */
-			if (!retried && qg->reserved > 0) {
-				struct btrfs_trans_handle *trans;
-
-				spin_unlock(&fs_info->qgroup_lock);
-				ret = btrfs_start_delalloc_inodes(root, 0);
-				if (ret)
-					return ret;
-				btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
-				trans = btrfs_join_transaction(root);
-				if (IS_ERR(trans))
-					return PTR_ERR(trans);
-				ret = btrfs_commit_transaction(trans);
-				if (ret)
-					return ret;
-				retried++;
-				goto retry;
-			}
 			ret = -EDQUOT;
 			goto out;
 		}
@@ -2424,7 +2455,7 @@ retry:
 		list_for_each_entry(glist, &qg->groups, next_group) {
 			ret = ulist_add(fs_info->qgroup_ulist,
 					glist->group->qgroupid,
-					(uintptr_t)glist->group, GFP_ATOMIC);
+					qgroup_to_aux(glist->group), GFP_ATOMIC);
 			if (ret < 0)
 				goto out;
 		}
@@ -2439,8 +2470,8 @@ retry:
 
 		qg = unode_aux_to_qgroup(unode);
 
-		trace_qgroup_update_reserve(fs_info, qg, num_bytes);
-		qg->reserved += num_bytes;
+		trace_qgroup_update_reserve(fs_info, qg, num_bytes, type);
+		qgroup_rsv_add(fs_info, qg, num_bytes, type);
 	}
 
 out:
@@ -2448,8 +2479,18 @@ out:
 	return ret;
 }
 
+/*
+ * Free @num_bytes of reserved space with @type for qgroup.  (Normally level 0
+ * qgroup).
+ *
+ * Will handle all higher level qgroup too.
+ *
+ * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
+ * This special case is only used for META_PERTRANS type.
+ */
 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
-			       u64 ref_root, u64 num_bytes)
+			       u64 ref_root, u64 num_bytes,
+			       enum btrfs_qgroup_rsv_type type)
 {
 	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *qgroup;
@@ -2463,6 +2504,10 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 	if (num_bytes == 0)
 		return;
 
+	if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) {
+		WARN(1, "%s: Invalid type to free", __func__);
+		return;
+	}
 	spin_lock(&fs_info->qgroup_lock);
 
 	quota_root = fs_info->quota_root;
@@ -2473,9 +2518,16 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 	if (!qgroup)
 		goto out;
 
+	if (num_bytes == (u64)-1)
+		/*
+		 * We're freeing all pertrans rsv, get reserved value from
+		 * level 0 qgroup as real num_bytes to free.
+		 */
+		num_bytes = qgroup->rsv.values[type];
+
 	ulist_reinit(fs_info->qgroup_ulist);
 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
-			(uintptr_t)qgroup, GFP_ATOMIC);
+			qgroup_to_aux(qgroup), GFP_ATOMIC);
 	if (ret < 0)
 		goto out;
 	ULIST_ITER_INIT(&uiter);
@@ -2485,16 +2537,13 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 
 		qg = unode_aux_to_qgroup(unode);
 
-		trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
-		if (qg->reserved < num_bytes)
-			report_reserved_underflow(fs_info, qg, num_bytes);
-		else
-			qg->reserved -= num_bytes;
+		trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type);
+		qgroup_rsv_release(fs_info, qg, num_bytes, type);
 
 		list_for_each_entry(glist, &qg->groups, next_group) {
 			ret = ulist_add(fs_info->qgroup_ulist,
 					glist->group->qgroupid,
-					(uintptr_t)glist->group, GFP_ATOMIC);
+					qgroup_to_aux(glist->group), GFP_ATOMIC);
 			if (ret < 0)
 				goto out;
 		}
@@ -2877,7 +2926,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
 					to_reserve, QGROUP_RESERVE);
 	if (ret < 0)
 		goto cleanup;
-	ret = qgroup_reserve(root, to_reserve, true);
+	ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
 	if (ret < 0)
 		goto cleanup;
 
@@ -2940,7 +2989,8 @@ static int qgroup_free_reserved_data(struct inode *inode,
 			goto out;
 		freed += changeset.bytes_changed;
 	}
-	btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
+	btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed,
+				  BTRFS_QGROUP_RSV_DATA);
 	ret = freed;
 out:
 	extent_changeset_release(&changeset);
@@ -2972,7 +3022,7 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
 	if (free)
 		btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
 				BTRFS_I(inode)->root->objectid,
-				changeset.bytes_changed);
+				changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
 	ret = changeset.bytes_changed;
 out:
 	extent_changeset_release(&changeset);
@@ -3017,8 +3067,48 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len)
 	return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
 }
 
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
-			      bool enforce)
+static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
+			      enum btrfs_qgroup_rsv_type type)
+{
+	if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
+	    type != BTRFS_QGROUP_RSV_META_PERTRANS)
+		return;
+	if (num_bytes == 0)
+		return;
+
+	spin_lock(&root->qgroup_meta_rsv_lock);
+	if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
+		root->qgroup_meta_rsv_prealloc += num_bytes;
+	else
+		root->qgroup_meta_rsv_pertrans += num_bytes;
+	spin_unlock(&root->qgroup_meta_rsv_lock);
+}
+
+static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
+			     enum btrfs_qgroup_rsv_type type)
+{
+	if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
+	    type != BTRFS_QGROUP_RSV_META_PERTRANS)
+		return 0;
+	if (num_bytes == 0)
+		return 0;
+
+	spin_lock(&root->qgroup_meta_rsv_lock);
+	if (type == BTRFS_QGROUP_RSV_META_PREALLOC) {
+		num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc,
+				  num_bytes);
+		root->qgroup_meta_rsv_prealloc -= num_bytes;
+	} else {
+		num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans,
+				  num_bytes);
+		root->qgroup_meta_rsv_pertrans -= num_bytes;
+	}
+	spin_unlock(&root->qgroup_meta_rsv_lock);
+	return num_bytes;
+}
+
+int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+				enum btrfs_qgroup_rsv_type type, bool enforce)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	int ret;
@@ -3028,31 +3118,39 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 		return 0;
 
 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
-	trace_qgroup_meta_reserve(root, (s64)num_bytes);
-	ret = qgroup_reserve(root, num_bytes, enforce);
+	trace_qgroup_meta_reserve(root, type, (s64)num_bytes);
+	ret = qgroup_reserve(root, num_bytes, enforce, type);
 	if (ret < 0)
 		return ret;
-	atomic64_add(num_bytes, &root->qgroup_meta_rsv);
+	/*
+	 * Record what we have reserved into root.
+	 *
+	 * To avoid quota disabled->enabled underflow.
+	 * In that case, we may try to free space we haven't reserved
+	 * (since quota was disabled), so record what we reserved into root.
+	 * And ensure later release won't underflow this number.
+	 */
+	add_root_meta_rsv(root, num_bytes, type);
 	return ret;
 }
 
-void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
+void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
-	u64 reserved;
 
 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
 	    !is_fstree(root->objectid))
 		return;
 
-	reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0);
-	if (reserved == 0)
-		return;
-	trace_qgroup_meta_reserve(root, -(s64)reserved);
-	btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
+	/* TODO: Update trace point to handle such free */
+	trace_qgroup_meta_free_all_pertrans(root);
+	/* Special value -1 means to free all reserved space */
+	btrfs_qgroup_free_refroot(fs_info, root->objectid, (u64)-1,
+				  BTRFS_QGROUP_RSV_META_PERTRANS);
 }
 
-void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
+void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
+			      enum btrfs_qgroup_rsv_type type)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 
@@ -3060,11 +3158,75 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
 	    !is_fstree(root->objectid))
 		return;
 
+	/*
+	 * reservation for META_PREALLOC can happen before quota is enabled,
+	 * which can lead to underflow.
+	 * Here ensure we will only free what we really have reserved.
+	 */
+	num_bytes = sub_root_meta_rsv(root, num_bytes, type);
 	BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
-	WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
-	atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
-	trace_qgroup_meta_reserve(root, -(s64)num_bytes);
-	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
+	trace_qgroup_meta_reserve(root, type, -(s64)num_bytes);
+	btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes, type);
+}
+
+static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
+				int num_bytes)
+{
+	struct btrfs_root *quota_root = fs_info->quota_root;
+	struct btrfs_qgroup *qgroup;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	int ret = 0;
+
+	if (num_bytes == 0)
+		return;
+	if (!quota_root)
+		return;
+
+	spin_lock(&fs_info->qgroup_lock);
+	qgroup = find_qgroup_rb(fs_info, ref_root);
+	if (!qgroup)
+		goto out;
+	ulist_reinit(fs_info->qgroup_ulist);
+	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
+		       qgroup_to_aux(qgroup), GFP_ATOMIC);
+	if (ret < 0)
+		goto out;
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
+		struct btrfs_qgroup *qg;
+		struct btrfs_qgroup_list *glist;
+
+		qg = unode_aux_to_qgroup(unode);
+
+		qgroup_rsv_release(fs_info, qg, num_bytes,
+				BTRFS_QGROUP_RSV_META_PREALLOC);
+		qgroup_rsv_add(fs_info, qg, num_bytes,
+				BTRFS_QGROUP_RSV_META_PERTRANS);
+		list_for_each_entry(glist, &qg->groups, next_group) {
+			ret = ulist_add(fs_info->qgroup_ulist,
+					glist->group->qgroupid,
+					qgroup_to_aux(glist->group), GFP_ATOMIC);
+			if (ret < 0)
+				goto out;
+		}
+	}
+out:
+	spin_unlock(&fs_info->qgroup_lock);
+}
+
+void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+
+	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
+	    !is_fstree(root->objectid))
+		return;
+	/* Same as btrfs_qgroup_free_meta_prealloc() */
+	num_bytes = sub_root_meta_rsv(root, num_bytes,
+				      BTRFS_QGROUP_RSV_META_PREALLOC);
+	trace_qgroup_meta_convert(root, num_bytes);
+	qgroup_convert_meta(fs_info, root->objectid, num_bytes);
 }
 
 /*
@@ -3092,7 +3254,7 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
 		}
 		btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
 				BTRFS_I(inode)->root->objectid,
-				changeset.bytes_changed);
+				changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
 
 	}
 	extent_changeset_release(&changeset);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index d9984e87cddf..e63e2d497a8e 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -62,6 +62,48 @@ struct btrfs_qgroup_extent_record {
 };
 
 /*
+ * Qgroup reservation types:
+ *
+ * DATA:
+ *	space reserved for data
+ *
+ * META_PERTRANS:
+ * 	Space reserved for metadata (per-transaction)
+ * 	Due to the fact that qgroup data is only updated at transaction commit
+ * 	time, reserved space for metadata must be kept until transaction
+ * 	commits.
+ * 	Any metadata reserved that are used in btrfs_start_transaction() should
+ * 	be of this type.
+ *
+ * META_PREALLOC:
+ *	There are cases where metadata space is reserved before starting
+ *	transaction, and then btrfs_join_transaction() to get a trans handle.
+ *	Any metadata reserved for such usage should be of this type.
+ *	And after join_transaction() part (or all) of such reservation should
+ *	be converted into META_PERTRANS.
+ */
+enum btrfs_qgroup_rsv_type {
+	BTRFS_QGROUP_RSV_DATA = 0,
+	BTRFS_QGROUP_RSV_META_PERTRANS,
+	BTRFS_QGROUP_RSV_META_PREALLOC,
+	BTRFS_QGROUP_RSV_LAST,
+};
+
+/*
+ * Represents how many bytes we have reserved for this qgroup.
+ *
+ * Each type should have different reservation behavior.
+ * E.g, data follows its io_tree flag modification, while
+ * *currently* meta is just reserve-and-clear during transcation.
+ *
+ * TODO: Add new type for reservation which can survive transaction commit.
+ * Currect metadata reservation behavior is not suitable for such case.
+ */
+struct btrfs_qgroup_rsv {
+	u64 values[BTRFS_QGROUP_RSV_LAST];
+};
+
+/*
  * one struct for each qgroup, organized in fs_info->qgroup_tree.
  */
 struct btrfs_qgroup {
@@ -87,7 +129,7 @@ struct btrfs_qgroup {
 	/*
 	 * reservation tracking
 	 */
-	u64 reserved;
+	struct btrfs_qgroup_rsv rsv;
 
 	/*
 	 * lists
@@ -220,20 +262,21 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
 			    struct btrfs_fs_info *fs_info,
 			    u64 bytenr, u64 num_bytes,
 			    struct ulist *old_roots, struct ulist *new_roots);
-int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 		      struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
 			 struct btrfs_qgroup_inherit *inherit);
 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
-			       u64 ref_root, u64 num_bytes);
+			       u64 ref_root, u64 num_bytes,
+			       enum btrfs_qgroup_rsv_type type);
 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
 						 u64 ref_root, u64 num_bytes)
 {
 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
-	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
+	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
+				  BTRFS_QGROUP_RSV_DATA);
 }
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@@ -248,9 +291,54 @@ int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
 int btrfs_qgroup_free_data(struct inode *inode,
 			struct extent_changeset *reserved, u64 start, u64 len);
 
-int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
-			      bool enforce);
-void btrfs_qgroup_free_meta_all(struct btrfs_root *root);
-void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes);
+int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+				enum btrfs_qgroup_rsv_type type, bool enforce);
+/* Reserve metadata space for pertrans and prealloc type */
+static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
+				int num_bytes, bool enforce)
+{
+	return __btrfs_qgroup_reserve_meta(root, num_bytes,
+			BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
+}
+static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
+				int num_bytes, bool enforce)
+{
+	return __btrfs_qgroup_reserve_meta(root, num_bytes,
+			BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
+}
+
+void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
+			     enum btrfs_qgroup_rsv_type type);
+
+/* Free per-transaction meta reservation for error handling */
+static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
+						   int num_bytes)
+{
+	__btrfs_qgroup_free_meta(root, num_bytes,
+			BTRFS_QGROUP_RSV_META_PERTRANS);
+}
+
+/* Pre-allocated meta reservation can be freed at need */
+static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
+						   int num_bytes)
+{
+	__btrfs_qgroup_free_meta(root, num_bytes,
+			BTRFS_QGROUP_RSV_META_PREALLOC);
+}
+
+/*
+ * Per-transaction meta reservation should be all freed at transaction commit
+ * time
+ */
+void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
+
+/*
+ * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
+ *
+ * This is called when preallocated meta reservation needs to be used.
+ * Normally after btrfs_join_transaction() call.
+ */
+void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
+
 void btrfs_qgroup_check_reserved_leak(struct inode *inode);
 #endif /* __BTRFS_QGROUP__ */
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index fcfc20de2df3..c3a2bc8af675 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1987,7 +1987,13 @@ cleanup:
 	kfree(pointers);
 
 cleanup_io:
-	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
+	/*
+	 * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
+	 * valid rbio which is consistent with ondisk content, thus such a
+	 * valid rbio can be cached to avoid further disk reads.
+	 */
+	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
+	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
 		/*
 		 * - In case of two failures, where rbio->failb != -1:
 		 *
@@ -2009,8 +2015,6 @@ cleanup_io:
 			clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
 
 		rbio_orig_end_io(rbio, err);
-	} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
-		rbio_orig_end_io(rbio, err);
 	} else if (err == BLK_STS_OK) {
 		rbio->faila = -1;
 		rbio->failb = -1;
@@ -2768,24 +2772,8 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	return rbio;
 }
 
-static void missing_raid56_work(struct btrfs_work *work)
-{
-	struct btrfs_raid_bio *rbio;
-
-	rbio = container_of(work, struct btrfs_raid_bio, work);
-	__raid56_parity_recover(rbio);
-}
-
-static void async_missing_raid56(struct btrfs_raid_bio *rbio)
-{
-	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
-			missing_raid56_work, NULL, NULL);
-
-	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
 void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
 {
 	if (!lock_stripe_add(rbio))
-		async_missing_raid56(rbio);
+		async_read_rebuild(rbio);
 }
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index ab852b8e3e37..a52dd12af648 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -395,20 +395,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 		goto error;
 
 	/* insert extent in reada_tree + all per-device trees, all or nothing */
-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_insert(&fs_info->reada_tree, index, re);
 	if (ret == -EEXIST) {
 		re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
 		re_exist->refcnt++;
 		spin_unlock(&fs_info->reada_lock);
-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+		btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 		radix_tree_preload_end();
 		goto error;
 	}
 	if (ret) {
 		spin_unlock(&fs_info->reada_lock);
-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+		btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 		radix_tree_preload_end();
 		goto error;
 	}
@@ -451,13 +451,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
 			}
 			radix_tree_delete(&fs_info->reada_tree, index);
 			spin_unlock(&fs_info->reada_lock);
-			btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+			btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 			goto error;
 		}
 		have_zone = 1;
 	}
 	spin_unlock(&fs_info->reada_lock);
-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 
 	if (!have_zone)
 		goto error;
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 171f3cce30e6..35fab67dcbe8 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -579,11 +579,16 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
 
 	while (level >= 0) {
 		if (level) {
+			struct btrfs_key first_key;
+
 			block_bytenr = btrfs_node_blockptr(path->nodes[level],
 							   path->slots[level]);
 			gen = btrfs_node_ptr_generation(path->nodes[level],
 							path->slots[level]);
-			eb = read_tree_block(fs_info, block_bytenr, gen);
+			btrfs_node_key_to_cpu(path->nodes[level], &first_key,
+					      path->slots[level]);
+			eb = read_tree_block(fs_info, block_bytenr, gen,
+					     level - 1, &first_key);
 			if (IS_ERR(eb))
 				return PTR_ERR(eb);
 			if (!extent_buffer_uptodate(eb)) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cd2298d185dd..4874c09f6d3c 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1839,6 +1839,8 @@ again:
 
 	parent = eb;
 	while (1) {
+		struct btrfs_key first_key;
+
 		level = btrfs_header_level(parent);
 		BUG_ON(level < lowest_level);
 
@@ -1852,6 +1854,7 @@ again:
 		old_bytenr = btrfs_node_blockptr(parent, slot);
 		blocksize = fs_info->nodesize;
 		old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
+		btrfs_node_key_to_cpu(parent, &key, slot);
 
 		if (level <= max_level) {
 			eb = path->nodes[level];
@@ -1876,7 +1879,8 @@ again:
 				break;
 			}
 
-			eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen);
+			eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
+					     level - 1, &first_key);
 			if (IS_ERR(eb)) {
 				ret = PTR_ERR(eb);
 				break;
@@ -2036,6 +2040,8 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
 	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
 
 	for (i = *level; i > 0; i--) {
+		struct btrfs_key first_key;
+
 		eb = path->nodes[i];
 		nritems = btrfs_header_nritems(eb);
 		while (path->slots[i] < nritems) {
@@ -2056,7 +2062,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
 		}
 
 		bytenr = btrfs_node_blockptr(eb, path->slots[i]);
-		eb = read_tree_block(fs_info, bytenr, ptr_gen);
+		btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
+		eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
+				     &first_key);
 		if (IS_ERR(eb)) {
 			return PTR_ERR(eb);
 		} else if (!extent_buffer_uptodate(eb)) {
@@ -2714,6 +2722,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 	path->lowest_level = node->level + 1;
 	rc->backref_cache.path[node->level] = node;
 	list_for_each_entry(edge, &node->upper, list[LOWER]) {
+		struct btrfs_key first_key;
+
 		cond_resched();
 
 		upper = edge->node[UPPER];
@@ -2779,7 +2789,9 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
 		blocksize = root->fs_info->nodesize;
 		generation = btrfs_node_ptr_generation(upper->eb, slot);
-		eb = read_tree_block(fs_info, bytenr, generation);
+		btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
+		eb = read_tree_block(fs_info, bytenr, generation,
+				     upper->level - 1, &first_key);
 		if (IS_ERR(eb)) {
 			err = PTR_ERR(eb);
 			goto next;
@@ -2944,7 +2956,8 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
 	struct extent_buffer *eb;
 
 	BUG_ON(block->key_ready);
-	eb = read_tree_block(fs_info, block->bytenr, block->key.offset);
+	eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
+			     block->level, NULL);
 	if (IS_ERR(eb)) {
 		return PTR_ERR(eb);
 	} else if (!extent_buffer_uptodate(eb)) {
@@ -3226,7 +3239,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 						   mask);
 			if (!page) {
 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
-							PAGE_SIZE);
+							PAGE_SIZE, true);
 				ret = -ENOMEM;
 				goto out;
 			}
@@ -3245,9 +3258,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
 				unlock_page(page);
 				put_page(page);
 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
-							PAGE_SIZE);
+							PAGE_SIZE, true);
 				btrfs_delalloc_release_extents(BTRFS_I(inode),
-							       PAGE_SIZE);
+							       PAGE_SIZE, true);
 				ret = -EIO;
 				goto out;
 			}
@@ -3274,9 +3287,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
 			unlock_page(page);
 			put_page(page);
 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
-							 PAGE_SIZE);
+							 PAGE_SIZE, true);
 			btrfs_delalloc_release_extents(BTRFS_I(inode),
-			                               PAGE_SIZE);
+			                               PAGE_SIZE, true);
 
 			clear_extent_bits(&BTRFS_I(inode)->io_tree,
 					  page_start, page_end,
@@ -3292,7 +3305,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
 		put_page(page);
 
 		index++;
-		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
+					       false);
 		balance_dirty_pages_ratelimited(inode->i_mapping);
 		btrfs_throttle(fs_info);
 	}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ec56f33feea9..1a2066ac6fe7 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -371,7 +371,7 @@ static struct full_stripe_lock *insert_full_stripe_lock(
 	struct full_stripe_lock *entry;
 	struct full_stripe_lock *ret;
 
-	WARN_ON(!mutex_is_locked(&locks_root->lock));
+	lockdep_assert_held(&locks_root->lock);
 
 	p = &locks_root->root.rb_node;
 	while (*p) {
@@ -413,7 +413,7 @@ static struct full_stripe_lock *search_full_stripe_lock(
 	struct rb_node *node;
 	struct full_stripe_lock *entry;
 
-	WARN_ON(!mutex_is_locked(&locks_root->lock));
+	lockdep_assert_held(&locks_root->lock);
 
 	node = locks_root->root.rb_node;
 	while (node) {
@@ -1111,7 +1111,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 	struct scrub_ctx *sctx = sblock_to_check->sctx;
 	struct btrfs_device *dev;
 	struct btrfs_fs_info *fs_info;
-	u64 length;
 	u64 logical;
 	unsigned int failed_mirror_index;
 	unsigned int is_metadata;
@@ -1139,7 +1138,6 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 		spin_unlock(&sctx->stat_lock);
 		return 0;
 	}
-	length = sblock_to_check->page_count * PAGE_SIZE;
 	logical = sblock_to_check->pagev[0]->logical;
 	BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
 	failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
@@ -1412,8 +1410,17 @@ nodatasum_case:
 		if (!page_bad->io_error && !sctx->is_dev_replace)
 			continue;
 
-		/* try to find no-io-error page in mirrors */
-		if (page_bad->io_error) {
+		if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
+			/*
+			 * In case of dev replace, if raid56 rebuild process
+			 * didn't work out correct data, then copy the content
+			 * in sblock_bad to make sure target device is identical
+			 * to source device, instead of writing garbage data in
+			 * sblock_for_recheck array to target device.
+			 */
+			sblock_other = NULL;
+		} else if (page_bad->io_error) {
+			/* try to find no-io-error page in mirrors */
 			for (mirror_index = 0;
 			     mirror_index < BTRFS_MAX_MIRRORS &&
 			     sblocks_for_recheck[mirror_index].page_count > 0;
@@ -1718,6 +1725,45 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
 	return blk_status_to_errno(bio->bi_status);
 }
 
+static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
+					  struct scrub_block *sblock)
+{
+	struct scrub_page *first_page = sblock->pagev[0];
+	struct bio *bio;
+	int page_num;
+
+	/* All pages in sblock belong to the same stripe on the same device. */
+	ASSERT(first_page->dev);
+	if (!first_page->dev->bdev)
+		goto out;
+
+	bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
+	bio_set_dev(bio, first_page->dev->bdev);
+
+	for (page_num = 0; page_num < sblock->page_count; page_num++) {
+		struct scrub_page *page = sblock->pagev[page_num];
+
+		WARN_ON(!page->page);
+		bio_add_page(bio, page->page, PAGE_SIZE, 0);
+	}
+
+	if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
+		bio_put(bio);
+		goto out;
+	}
+
+	bio_put(bio);
+
+	scrub_recheck_block_checksum(sblock);
+
+	return;
+out:
+	for (page_num = 0; page_num < sblock->page_count; page_num++)
+		sblock->pagev[page_num]->io_error = 1;
+
+	sblock->no_io_error_seen = 0;
+}
+
 /*
  * this function will check the on disk data for checksum errors, header
  * errors and read I/O errors. If any I/O errors happen, the exact pages
@@ -1733,6 +1779,10 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 
 	sblock->no_io_error_seen = 1;
 
+	/* short cut for raid56 */
+	if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
+		return scrub_recheck_block_on_raid56(fs_info, sblock);
+
 	for (page_num = 0; page_num < sblock->page_count; page_num++) {
 		struct bio *bio;
 		struct scrub_page *page = sblock->pagev[page_num];
@@ -1748,19 +1798,12 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 		bio_set_dev(bio, page->dev->bdev);
 
 		bio_add_page(bio, page->page, PAGE_SIZE, 0);
-		if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
-			if (scrub_submit_raid56_bio_wait(fs_info, bio, page)) {
-				page->io_error = 1;
-				sblock->no_io_error_seen = 0;
-			}
-		} else {
-			bio->bi_iter.bi_sector = page->physical >> 9;
-			bio_set_op_attrs(bio, REQ_OP_READ, 0);
+		bio->bi_iter.bi_sector = page->physical >> 9;
+		bio->bi_opf = REQ_OP_READ;
 
-			if (btrfsic_submit_bio_wait(bio)) {
-				page->io_error = 1;
-				sblock->no_io_error_seen = 0;
-			}
+		if (btrfsic_submit_bio_wait(bio)) {
+			page->io_error = 1;
+			sblock->no_io_error_seen = 0;
 		}
 
 		bio_put(bio);
@@ -2728,7 +2771,8 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
 }
 
 /* scrub extent tries to collect up to 64 kB for each bio */
-static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
+static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
+			u64 logical, u64 len,
 			u64 physical, struct btrfs_device *dev, u64 flags,
 			u64 gen, int mirror_num, u64 physical_for_dev_replace)
 {
@@ -2737,13 +2781,19 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
 	u32 blocksize;
 
 	if (flags & BTRFS_EXTENT_FLAG_DATA) {
-		blocksize = sctx->fs_info->sectorsize;
+		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+			blocksize = map->stripe_len;
+		else
+			blocksize = sctx->fs_info->sectorsize;
 		spin_lock(&sctx->stat_lock);
 		sctx->stat.data_extents_scrubbed++;
 		sctx->stat.data_bytes_scrubbed += len;
 		spin_unlock(&sctx->stat_lock);
 	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
-		blocksize = sctx->fs_info->nodesize;
+		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
+			blocksize = map->stripe_len;
+		else
+			blocksize = sctx->fs_info->nodesize;
 		spin_lock(&sctx->stat_lock);
 		sctx->stat.tree_extents_scrubbed++;
 		sctx->stat.tree_bytes_scrubbed += len;
@@ -2883,9 +2933,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
 	}
 
 	if (flags & BTRFS_EXTENT_FLAG_DATA) {
-		blocksize = sctx->fs_info->sectorsize;
+		blocksize = sparity->stripe_len;
 	} else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
-		blocksize = sctx->fs_info->nodesize;
+		blocksize = sparity->stripe_len;
 	} else {
 		blocksize = sctx->fs_info->sectorsize;
 		WARN_ON(1);
@@ -3595,7 +3645,7 @@ again:
 			if (ret)
 				goto out;
 
-			ret = scrub_extent(sctx, extent_logical, extent_len,
+			ret = scrub_extent(sctx, map, extent_logical, extent_len,
 					   extent_physical, extent_dev, flags,
 					   generation, extent_mirror_num,
 					   extent_logical - logical + physical);
@@ -3885,11 +3935,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 			break;
 		}
 
-		btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
+		btrfs_dev_replace_write_lock(&fs_info->dev_replace);
 		dev_replace->cursor_right = found_key.offset + length;
 		dev_replace->cursor_left = found_key.offset;
 		dev_replace->item_needs_writeback = 1;
-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
+		btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
 		ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
 				  found_key.offset, cache, is_dev_replace);
 
@@ -3925,10 +3975,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 
 		scrub_pause_off(fs_info);
 
-		btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
+		btrfs_dev_replace_write_lock(&fs_info->dev_replace);
 		dev_replace->cursor_left = dev_replace->cursor_right;
 		dev_replace->item_needs_writeback = 1;
-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
+		btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
 
 		if (ro_set)
 			btrfs_dec_block_group_ro(cache);
@@ -4144,16 +4194,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
 		return -EIO;
 	}
 
-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
 	if (dev->scrub_ctx ||
 	    (!is_dev_replace &&
 	     btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
-		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+		btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 		mutex_unlock(&fs_info->scrub_lock);
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 		return -EINPROGRESS;
 	}
-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 
 	ret = scrub_workers_get(fs_info, is_dev_replace);
 	if (ret) {
@@ -4480,7 +4530,8 @@ static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
 	 * move on to the next inode.
 	 */
 	if (em->block_start > logical ||
-	    em->block_start + em->block_len < logical + len) {
+	    em->block_start + em->block_len < logical + len ||
+	    test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
 		free_extent_map(em);
 		ret = 1;
 		goto out_unlock;
@@ -4620,7 +4671,6 @@ static int write_page_nocow(struct scrub_ctx *sctx,
 {
 	struct bio *bio;
 	struct btrfs_device *dev;
-	int ret;
 
 	dev = sctx->wr_tgtdev;
 	if (!dev)
@@ -4635,17 +4685,15 @@ static int write_page_nocow(struct scrub_ctx *sctx,
 	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
 	bio_set_dev(bio, dev->bdev);
 	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
-	ret = bio_add_page(bio, page, PAGE_SIZE, 0);
-	if (ret != PAGE_SIZE) {
-leave_with_eio:
+	/* bio_add_page won't fail on a freshly allocated bio */
+	bio_add_page(bio, page, PAGE_SIZE, 0);
+
+	if (btrfsic_submit_bio_wait(bio)) {
 		bio_put(bio);
 		btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
 		return -EIO;
 	}
 
-	if (btrfsic_submit_bio_wait(bio))
-		goto leave_with_eio;
-
 	bio_put(bio);
 	return 0;
 }
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 484e2af793de..1f5748c7d1c7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -27,10 +27,10 @@
 #include <linux/vmalloc.h>
 #include <linux/string.h>
 #include <linux/compat.h>
+#include <linux/crc32c.h>
 
 #include "send.h"
 #include "backref.h"
-#include "hash.h"
 #include "locking.h"
 #include "disk-io.h"
 #include "btrfs_inode.h"
@@ -112,6 +112,7 @@ struct send_ctx {
 	u64 cur_inode_mode;
 	u64 cur_inode_rdev;
 	u64 cur_inode_last_extent;
+	u64 cur_inode_next_write_offset;
 
 	u64 send_progress;
 
@@ -270,6 +271,7 @@ struct name_cache_entry {
 	char name[];
 };
 
+__cold
 static void inconsistent_snapshot_error(struct send_ctx *sctx,
 					enum btrfs_compare_tree_result result,
 					const char *what)
@@ -611,9 +613,9 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
 }
 
 
-#define TLV_PUT(sctx, attrtype, attrlen, data) \
+#define TLV_PUT(sctx, attrtype, data, attrlen) \
 	do { \
-		ret = tlv_put(sctx, attrtype, attrlen, data); \
+		ret = tlv_put(sctx, attrtype, data, attrlen); \
 		if (ret < 0) \
 			goto tlv_put_failure; \
 	} while (0)
@@ -695,7 +697,7 @@ static int send_cmd(struct send_ctx *sctx)
 	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
 	hdr->crc = 0;
 
-	crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
+	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
 	hdr->crc = cpu_to_le32(crc);
 
 	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
@@ -5029,6 +5031,7 @@ static int send_hole(struct send_ctx *sctx, u64 end)
 			break;
 		offset += len;
 	}
+	sctx->cur_inode_next_write_offset = offset;
 tlv_put_failure:
 	fs_path_free(p);
 	return ret;
@@ -5264,6 +5267,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
 	} else {
 		ret = send_extent_data(sctx, offset, len);
 	}
+	sctx->cur_inode_next_write_offset = offset + len;
 out:
 	return ret;
 }
@@ -5788,6 +5792,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 	u64 right_gid;
 	int need_chmod = 0;
 	int need_chown = 0;
+	int need_truncate = 1;
 	int pending_move = 0;
 	int refs_processed = 0;
 
@@ -5825,9 +5830,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 		need_chown = 1;
 		if (!S_ISLNK(sctx->cur_inode_mode))
 			need_chmod = 1;
+		if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
+			need_truncate = 0;
 	} else {
+		u64 old_size;
+
 		ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
-				NULL, NULL, &right_mode, &right_uid,
+				&old_size, NULL, &right_mode, &right_uid,
 				&right_gid, NULL);
 		if (ret < 0)
 			goto out;
@@ -5836,6 +5845,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 			need_chown = 1;
 		if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
 			need_chmod = 1;
+		if ((old_size == sctx->cur_inode_size) ||
+		    (sctx->cur_inode_size > old_size &&
+		     sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
+			need_truncate = 0;
 	}
 
 	if (S_ISREG(sctx->cur_inode_mode)) {
@@ -5854,10 +5867,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 					goto out;
 			}
 		}
-		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
-				sctx->cur_inode_size);
-		if (ret < 0)
-			goto out;
+		if (need_truncate) {
+			ret = send_truncate(sctx, sctx->cur_ino,
+					    sctx->cur_inode_gen,
+					    sctx->cur_inode_size);
+			if (ret < 0)
+				goto out;
+		}
 	}
 
 	if (need_chown) {
@@ -5911,6 +5927,7 @@ static int changed_inode(struct send_ctx *sctx,
 	sctx->cur_ino = key->objectid;
 	sctx->cur_inode_new_gen = 0;
 	sctx->cur_inode_last_extent = (u64)-1;
+	sctx->cur_inode_next_write_offset = 0;
 
 	/*
 	 * Set send_progress to current inode. This will tell all get_cur_xxx
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4b817947e00f..170baef49fae 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,6 +41,7 @@
 #include <linux/slab.h>
 #include <linux/cleancache.h>
 #include <linux/ratelimit.h>
+#include <linux/crc32c.h>
 #include <linux/btrfs.h>
 #include "delayed-inode.h"
 #include "ctree.h"
@@ -48,7 +49,6 @@
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "print-tree.h"
-#include "hash.h"
 #include "props.h"
 #include "xattr.h"
 #include "volumes.h"
@@ -308,21 +308,50 @@ static void btrfs_put_super(struct super_block *sb)
 }
 
 enum {
-	Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
-	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
-	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
-	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
-	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
-	Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
-	Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
-	Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
-	Opt_skip_balance, Opt_check_integrity,
+	Opt_acl, Opt_noacl,
+	Opt_clear_cache,
+	Opt_commit_interval,
+	Opt_compress,
+	Opt_compress_force,
+	Opt_compress_force_type,
+	Opt_compress_type,
+	Opt_degraded,
+	Opt_device,
+	Opt_fatal_errors,
+	Opt_flushoncommit, Opt_noflushoncommit,
+	Opt_inode_cache, Opt_noinode_cache,
+	Opt_max_inline,
+	Opt_barrier, Opt_nobarrier,
+	Opt_datacow, Opt_nodatacow,
+	Opt_datasum, Opt_nodatasum,
+	Opt_defrag, Opt_nodefrag,
+	Opt_discard, Opt_nodiscard,
+	Opt_nologreplay,
+	Opt_norecovery,
+	Opt_ratio,
+	Opt_rescan_uuid_tree,
+	Opt_skip_balance,
+	Opt_space_cache, Opt_no_space_cache,
+	Opt_space_cache_version,
+	Opt_ssd, Opt_nossd,
+	Opt_ssd_spread, Opt_nossd_spread,
+	Opt_subvol,
+	Opt_subvolid,
+	Opt_thread_pool,
+	Opt_treelog, Opt_notreelog,
+	Opt_usebackuproot,
+	Opt_user_subvol_rm_allowed,
+
+	/* Deprecated options */
+	Opt_alloc_start,
+	Opt_recovery,
+	Opt_subvolrootid,
+
+	/* Debugging options */
+	Opt_check_integrity,
 	Opt_check_integrity_including_extent_data,
-	Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
-	Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
-	Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
-	Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
-	Opt_nologreplay, Opt_norecovery,
+	Opt_check_integrity_print_mask,
+	Opt_enospc_debug, Opt_noenospc_debug,
 #ifdef CONFIG_BTRFS_DEBUG
 	Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
 #endif
@@ -333,58 +362,63 @@ enum {
 };
 
 static const match_table_t tokens = {
-	{Opt_degraded, "degraded"},
-	{Opt_subvol, "subvol=%s"},
-	{Opt_subvolid, "subvolid=%s"},
-	{Opt_device, "device=%s"},
-	{Opt_nodatasum, "nodatasum"},
-	{Opt_datasum, "datasum"},
-	{Opt_nodatacow, "nodatacow"},
-	{Opt_datacow, "datacow"},
-	{Opt_nobarrier, "nobarrier"},
-	{Opt_barrier, "barrier"},
-	{Opt_max_inline, "max_inline=%s"},
-	{Opt_alloc_start, "alloc_start=%s"},
-	{Opt_thread_pool, "thread_pool=%d"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+	{Opt_clear_cache, "clear_cache"},
+	{Opt_commit_interval, "commit=%u"},
 	{Opt_compress, "compress"},
 	{Opt_compress_type, "compress=%s"},
 	{Opt_compress_force, "compress-force"},
 	{Opt_compress_force_type, "compress-force=%s"},
-	{Opt_ssd, "ssd"},
-	{Opt_ssd_spread, "ssd_spread"},
-	{Opt_nossd, "nossd"},
-	{Opt_acl, "acl"},
-	{Opt_noacl, "noacl"},
-	{Opt_notreelog, "notreelog"},
-	{Opt_treelog, "treelog"},
-	{Opt_nologreplay, "nologreplay"},
-	{Opt_norecovery, "norecovery"},
+	{Opt_degraded, "degraded"},
+	{Opt_device, "device=%s"},
+	{Opt_fatal_errors, "fatal_errors=%s"},
 	{Opt_flushoncommit, "flushoncommit"},
 	{Opt_noflushoncommit, "noflushoncommit"},
-	{Opt_ratio, "metadata_ratio=%d"},
+	{Opt_inode_cache, "inode_cache"},
+	{Opt_noinode_cache, "noinode_cache"},
+	{Opt_max_inline, "max_inline=%s"},
+	{Opt_barrier, "barrier"},
+	{Opt_nobarrier, "nobarrier"},
+	{Opt_datacow, "datacow"},
+	{Opt_nodatacow, "nodatacow"},
+	{Opt_datasum, "datasum"},
+	{Opt_nodatasum, "nodatasum"},
+	{Opt_defrag, "autodefrag"},
+	{Opt_nodefrag, "noautodefrag"},
 	{Opt_discard, "discard"},
 	{Opt_nodiscard, "nodiscard"},
+	{Opt_nologreplay, "nologreplay"},
+	{Opt_norecovery, "norecovery"},
+	{Opt_ratio, "metadata_ratio=%u"},
+	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
+	{Opt_skip_balance, "skip_balance"},
 	{Opt_space_cache, "space_cache"},
+	{Opt_no_space_cache, "nospace_cache"},
 	{Opt_space_cache_version, "space_cache=%s"},
-	{Opt_clear_cache, "clear_cache"},
+	{Opt_ssd, "ssd"},
+	{Opt_nossd, "nossd"},
+	{Opt_ssd_spread, "ssd_spread"},
+	{Opt_nossd_spread, "nossd_spread"},
+	{Opt_subvol, "subvol=%s"},
+	{Opt_subvolid, "subvolid=%s"},
+	{Opt_thread_pool, "thread_pool=%u"},
+	{Opt_treelog, "treelog"},
+	{Opt_notreelog, "notreelog"},
+	{Opt_usebackuproot, "usebackuproot"},
 	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
-	{Opt_enospc_debug, "enospc_debug"},
-	{Opt_noenospc_debug, "noenospc_debug"},
+
+	/* Deprecated options */
+	{Opt_alloc_start, "alloc_start=%s"},
+	{Opt_recovery, "recovery"},
 	{Opt_subvolrootid, "subvolrootid=%d"},
-	{Opt_defrag, "autodefrag"},
-	{Opt_nodefrag, "noautodefrag"},
-	{Opt_inode_cache, "inode_cache"},
-	{Opt_noinode_cache, "noinode_cache"},
-	{Opt_no_space_cache, "nospace_cache"},
-	{Opt_recovery, "recovery"}, /* deprecated */
-	{Opt_usebackuproot, "usebackuproot"},
-	{Opt_skip_balance, "skip_balance"},
+
+	/* Debugging options */
 	{Opt_check_integrity, "check_int"},
 	{Opt_check_integrity_including_extent_data, "check_int_data"},
-	{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
-	{Opt_rescan_uuid_tree, "rescan_uuid_tree"},
-	{Opt_fatal_errors, "fatal_errors=%s"},
-	{Opt_commit_interval, "commit=%d"},
+	{Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
+	{Opt_enospc_debug, "enospc_debug"},
+	{Opt_noenospc_debug, "noenospc_debug"},
 #ifdef CONFIG_BTRFS_DEBUG
 	{Opt_fragment_data, "fragment=data"},
 	{Opt_fragment_metadata, "fragment=metadata"},
@@ -579,6 +613,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			btrfs_set_opt(info->mount_opt, NOSSD);
 			btrfs_clear_and_info(info, SSD,
 					     "not using ssd optimizations");
+			/* Fallthrough */
+		case Opt_nossd_spread:
 			btrfs_clear_and_info(info, SSD_SPREAD,
 					     "not using spread ssd allocation scheme");
 			break;
@@ -594,12 +630,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			ret = match_int(&args[0], &intarg);
 			if (ret) {
 				goto out;
-			} else if (intarg > 0) {
-				info->thread_pool_size = intarg;
-			} else {
+			} else if (intarg == 0) {
 				ret = -EINVAL;
 				goto out;
 			}
+			info->thread_pool_size = intarg;
 			break;
 		case Opt_max_inline:
 			num = match_strdup(&args[0]);
@@ -658,16 +693,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			break;
 		case Opt_ratio:
 			ret = match_int(&args[0], &intarg);
-			if (ret) {
+			if (ret)
 				goto out;
-			} else if (intarg >= 0) {
-				info->metadata_ratio = intarg;
-				btrfs_info(info, "metadata ratio %d",
-					   info->metadata_ratio);
-			} else {
-				ret = -EINVAL;
-				goto out;
-			}
+			info->metadata_ratio = intarg;
+			btrfs_info(info, "metadata ratio %u",
+				   info->metadata_ratio);
 			break;
 		case Opt_discard:
 			btrfs_set_and_info(info, DISCARD,
@@ -762,17 +792,11 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			break;
 		case Opt_check_integrity_print_mask:
 			ret = match_int(&args[0], &intarg);
-			if (ret) {
+			if (ret)
 				goto out;
-			} else if (intarg >= 0) {
-				info->check_integrity_print_mask = intarg;
-				btrfs_info(info,
-					   "check_integrity_print_mask 0x%x",
-					   info->check_integrity_print_mask);
-			} else {
-				ret = -EINVAL;
-				goto out;
-			}
+			info->check_integrity_print_mask = intarg;
+			btrfs_info(info, "check_integrity_print_mask 0x%x",
+				   info->check_integrity_print_mask);
 			break;
 #else
 		case Opt_check_integrity_including_extent_data:
@@ -798,24 +822,18 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 		case Opt_commit_interval:
 			intarg = 0;
 			ret = match_int(&args[0], &intarg);
-			if (ret < 0) {
-				btrfs_err(info, "invalid commit interval");
-				ret = -EINVAL;
+			if (ret)
 				goto out;
-			}
-			if (intarg > 0) {
-				if (intarg > 300) {
-					btrfs_warn(info,
-						"excessive commit interval %d",
-						intarg);
-				}
-				info->commit_interval = intarg;
-			} else {
+			if (intarg == 0) {
 				btrfs_info(info,
-					   "using default commit interval %ds",
+					   "using default commit interval %us",
 					   BTRFS_DEFAULT_COMMIT_INTERVAL);
-				info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
+				intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
+			} else if (intarg > 300) {
+				btrfs_warn(info, "excessive commit interval %d",
+					   intarg);
 			}
+			info->commit_interval = intarg;
 			break;
 #ifdef CONFIG_BTRFS_DEBUG
 		case Opt_fragment_all:
@@ -932,8 +950,8 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
 {
 	substring_t args[MAX_OPT_ARGS];
 	char *opts, *orig, *p;
-	char *num = NULL;
 	int error = 0;
+	u64 subvolid;
 
 	if (!options)
 		return 0;
@@ -963,18 +981,15 @@ static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
 			}
 			break;
 		case Opt_subvolid:
-			num = match_strdup(&args[0]);
-			if (num) {
-				*subvol_objectid = memparse(num, NULL);
-				kfree(num);
-				/* we want the original fs_tree */
-				if (!*subvol_objectid)
-					*subvol_objectid =
-						BTRFS_FS_TREE_OBJECTID;
-			} else {
-				error = -EINVAL;
+			error = match_u64(&args[0], &subvolid);
+			if (error)
 				goto out;
-			}
+
+			/* we want the original fs_tree */
+			if (subvolid == 0)
+				subvolid = BTRFS_FS_TREE_OBJECTID;
+
+			*subvol_objectid = subvolid;
 			break;
 		case Opt_subvolrootid:
 			pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
@@ -1284,7 +1299,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 		seq_printf(seq, ",max_inline=%llu", info->max_inline);
 	if (info->thread_pool_size !=  min_t(unsigned long,
 					     num_online_cpus() + 2, 8))
-		seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
+		seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
 	if (btrfs_test_opt(info, COMPRESS)) {
 		compress_type = btrfs_compress_type2str(info->compress_type);
 		if (btrfs_test_opt(info, FORCE_COMPRESS))
@@ -1340,12 +1355,11 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 				info->check_integrity_print_mask);
 #endif
 	if (info->metadata_ratio)
-		seq_printf(seq, ",metadata_ratio=%d",
-				info->metadata_ratio);
+		seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
 	if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
 		seq_puts(seq, ",fatal_errors=panic");
 	if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
-		seq_printf(seq, ",commit=%d", info->commit_interval);
+		seq_printf(seq, ",commit=%u", info->commit_interval);
 #ifdef CONFIG_BTRFS_DEBUG
 	if (btrfs_test_opt(info, FRAGMENT_DATA))
 		seq_puts(seq, ",fragment=data");
@@ -1690,7 +1704,7 @@ out:
 }
 
 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
-				     int new_pool_size, int old_pool_size)
+				     u32 new_pool_size, u32 old_pool_size)
 {
 	if (new_pool_size == old_pool_size)
 		return;
@@ -1758,8 +1772,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 	unsigned long old_opts = fs_info->mount_opt;
 	unsigned long old_compress_type = fs_info->compress_type;
 	u64 old_max_inline = fs_info->max_inline;
-	int old_thread_pool_size = fs_info->thread_pool_size;
-	unsigned int old_metadata_ratio = fs_info->metadata_ratio;
+	u32 old_thread_pool_size = fs_info->thread_pool_size;
+	u32 old_metadata_ratio = fs_info->metadata_ratio;
 	int ret;
 
 	sync_filesystem(sb);
@@ -2290,11 +2304,18 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 	struct list_head *head;
 	struct rcu_string *name;
 
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	/*
+	 * Lightweight locking of the devices. We should not need
+	 * device_list_mutex here as we only read the device data and the list
+	 * is protected by RCU.  Even if a device is deleted during the list
+	 * traversals, we'll get valid data, the freeing callback will wait at
+	 * least until until the rcu_read_unlock.
+	 */
+	rcu_read_lock();
 	cur_devices = fs_info->fs_devices;
 	while (cur_devices) {
 		head = &cur_devices->devices;
-		list_for_each_entry(dev, head, dev_list) {
+		list_for_each_entry_rcu(dev, head, dev_list) {
 			if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
 				continue;
 			if (!dev->name)
@@ -2306,14 +2327,12 @@ static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 	}
 
 	if (first_dev) {
-		rcu_read_lock();
 		name = rcu_dereference(first_dev->name);
 		seq_escape(m, name->str, " \t\n\\");
-		rcu_read_unlock();
 	} else {
 		WARN_ON(1);
 	}
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+	rcu_read_unlock();
 	return 0;
 }
 
@@ -2355,7 +2374,7 @@ static int __init btrfs_interface_init(void)
 	return misc_register(&btrfs_misc);
 }
 
-static void btrfs_interface_exit(void)
+static __cold void btrfs_interface_exit(void)
 {
 	misc_deregister(&btrfs_misc);
 }
@@ -2376,22 +2395,18 @@ static void __init btrfs_print_mod_info(void)
 			", ref-verify=on"
 #endif
 			"\n",
-			btrfs_crc32c_impl());
+			crc32c_impl());
 }
 
 static int __init init_btrfs_fs(void)
 {
 	int err;
 
-	err = btrfs_hash_init();
-	if (err)
-		return err;
-
 	btrfs_props_init();
 
 	err = btrfs_init_sysfs();
 	if (err)
-		goto free_hash;
+		return err;
 
 	btrfs_init_compress();
 
@@ -2472,8 +2487,7 @@ free_cachep:
 free_compress:
 	btrfs_exit_compress();
 	btrfs_exit_sysfs();
-free_hash:
-	btrfs_hash_exit();
+
 	return err;
 }
 
@@ -2493,7 +2507,6 @@ static void __exit exit_btrfs_fs(void)
 	btrfs_exit_sysfs();
 	btrfs_cleanup_fs_uuids();
 	btrfs_exit_compress();
-	btrfs_hash_exit();
 }
 
 late_initcall(init_btrfs_fs);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index a8bafed931f4..ca067471cd46 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -272,7 +272,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
 {
 	struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
 	struct btrfs_block_group_cache *block_group;
-	int index = to_raid_kobj(kobj)->raid_type;
+	int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
 	u64 val = 0;
 
 	down_read(&sinfo->groups_sem);
@@ -923,7 +923,7 @@ out1:
 	return ret;
 }
 
-void btrfs_exit_sysfs(void)
+void __cold btrfs_exit_sysfs(void)
 {
 	sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
 	kset_unregister(btrfs_kset);
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 9786d8cd0aa6..e74278170806 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -278,8 +278,7 @@ int btrfs_run_sanity_tests(void)
 		}
 	}
 	ret = btrfs_test_extent_map();
-	if (ret)
-		goto out;
+
 out:
 	btrfs_destroy_test_fs();
 	return ret;
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 70c993f01670..c23bd00bdd92 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -343,7 +343,7 @@ static void test_case_4(struct extent_map_tree *em_tree)
 	__test_case_4(em_tree, SZ_4K);
 }
 
-int btrfs_test_extent_map()
+int btrfs_test_extent_map(void)
 {
 	struct extent_map_tree *em_tree;
 
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 90204b166643..160eb2fba726 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -63,7 +63,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
 	btrfs_set_extent_generation(leaf, item, 1);
 	btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_TREE_BLOCK);
 	block_info = (struct btrfs_tree_block_info *)(item + 1);
-	btrfs_set_tree_block_level(leaf, block_info, 1);
+	btrfs_set_tree_block_level(leaf, block_info, 0);
 	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
 	if (parent > 0) {
 		btrfs_set_extent_inline_ref_type(leaf, iref,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 04f07144b45c..5c4cf0f9146b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -37,22 +37,16 @@
 
 static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
 	[TRANS_STATE_RUNNING]		= 0U,
-	[TRANS_STATE_BLOCKED]		= (__TRANS_USERSPACE |
-					   __TRANS_START),
-	[TRANS_STATE_COMMIT_START]	= (__TRANS_USERSPACE |
-					   __TRANS_START |
-					   __TRANS_ATTACH),
-	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_USERSPACE |
-					   __TRANS_START |
+	[TRANS_STATE_BLOCKED]		=  __TRANS_START,
+	[TRANS_STATE_COMMIT_START]	= (__TRANS_START | __TRANS_ATTACH),
+	[TRANS_STATE_COMMIT_DOING]	= (__TRANS_START |
 					   __TRANS_ATTACH |
 					   __TRANS_JOIN),
-	[TRANS_STATE_UNBLOCKED]		= (__TRANS_USERSPACE |
-					   __TRANS_START |
+	[TRANS_STATE_UNBLOCKED]		= (__TRANS_START |
 					   __TRANS_ATTACH |
 					   __TRANS_JOIN |
 					   __TRANS_JOIN_NOLOCK),
-	[TRANS_STATE_COMPLETED]		= (__TRANS_USERSPACE |
-					   __TRANS_START |
+	[TRANS_STATE_COMPLETED]		= (__TRANS_START |
 					   __TRANS_ATTACH |
 					   __TRANS_JOIN |
 					   __TRANS_JOIN_NOLOCK),
@@ -126,9 +120,9 @@ static void clear_btree_io_tree(struct extent_io_tree *tree)
 	spin_unlock(&tree->lock);
 }
 
-static noinline void switch_commit_roots(struct btrfs_transaction *trans,
-					 struct btrfs_fs_info *fs_info)
+static noinline void switch_commit_roots(struct btrfs_transaction *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root, *tmp;
 
 	down_write(&fs_info->commit_root_sem);
@@ -319,7 +313,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
 	if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
 	    root->last_trans < trans->transid) || force) {
 		WARN_ON(root == fs_info->extent_root);
-		WARN_ON(root->commit_root != root->node);
+		WARN_ON(!force && root->commit_root != root->node);
 
 		/*
 		 * see below for IN_TRANS_SETUP usage rules
@@ -449,11 +443,7 @@ static int may_wait_transaction(struct btrfs_fs_info *fs_info, int type)
 	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
 		return 0;
 
-	if (type == TRANS_USERSPACE)
-		return 1;
-
-	if (type == TRANS_START &&
-	    !atomic_read(&fs_info->open_ioctl_trans))
+	if (type == TRANS_START)
 		return 1;
 
 	return 0;
@@ -508,8 +498,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
 	 */
 	if (num_items && root != fs_info->chunk_root) {
 		qgroup_reserved = num_items * fs_info->nodesize;
-		ret = btrfs_qgroup_reserve_meta(root, qgroup_reserved,
-						enforce_qgroups);
+		ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
+				enforce_qgroups);
 		if (ret)
 			return ERR_PTR(ret);
 
@@ -593,7 +583,7 @@ again:
 got_it:
 	btrfs_record_root_in_trans(h, root);
 
-	if (!current->journal_info && type != TRANS_USERSPACE)
+	if (!current->journal_info)
 		current->journal_info = h;
 	return h;
 
@@ -606,7 +596,7 @@ alloc_fail:
 		btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
 					num_bytes);
 reserve_fail:
-	btrfs_qgroup_free_meta(root, qgroup_reserved);
+	btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
 	return ERR_PTR(ret);
 }
 
@@ -658,14 +648,6 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
 	return trans;
 }
 
-struct btrfs_trans_handle *btrfs_start_transaction_lflush(
-					struct btrfs_root *root,
-					unsigned int num_items)
-{
-	return start_transaction(root, num_items, TRANS_START,
-				 BTRFS_RESERVE_FLUSH_LIMIT, true);
-}
-
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
 {
 	return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH,
@@ -678,12 +660,6 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root
 				 BTRFS_RESERVE_NO_FLUSH, true);
 }
 
-struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
-{
-	return start_transaction(root, 0, TRANS_USERSPACE,
-				 BTRFS_RESERVE_NO_FLUSH, true);
-}
-
 /*
  * btrfs_attach_transaction() - catch the running transaction
  *
@@ -789,8 +765,7 @@ out:
 
 void btrfs_throttle(struct btrfs_fs_info *fs_info)
 {
-	if (!atomic_read(&fs_info->open_ioctl_trans))
-		wait_current_trans(fs_info);
+	wait_current_trans(fs_info);
 }
 
 static int should_end_transaction(struct btrfs_trans_handle *trans)
@@ -806,7 +781,6 @@ static int should_end_transaction(struct btrfs_trans_handle *trans)
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
 {
 	struct btrfs_transaction *cur_trans = trans->transaction;
-	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int updates;
 	int err;
 
@@ -818,7 +792,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
 	updates = trans->delayed_ref_updates;
 	trans->delayed_ref_updates = 0;
 	if (updates) {
-		err = btrfs_run_delayed_refs(trans, fs_info, updates * 2);
+		err = btrfs_run_delayed_refs(trans, updates * 2);
 		if (err) /* Error code will also eval true */
 			return err;
 	}
@@ -826,6 +800,27 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
 	return should_end_transaction(trans);
 }
 
+static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
+
+{
+	struct btrfs_fs_info *fs_info = trans->fs_info;
+
+	if (!trans->block_rsv) {
+		ASSERT(!trans->bytes_reserved);
+		return;
+	}
+
+	if (!trans->bytes_reserved)
+		return;
+
+	ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
+	trace_btrfs_space_reservation(fs_info, "transaction",
+				      trans->transid, trans->bytes_reserved, 0);
+	btrfs_block_rsv_release(fs_info, trans->block_rsv,
+				trans->bytes_reserved);
+	trans->bytes_reserved = 0;
+}
+
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 				   int throttle)
 {
@@ -843,11 +838,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 		return 0;
 	}
 
-	btrfs_trans_release_metadata(trans, info);
+	btrfs_trans_release_metadata(trans);
 	trans->block_rsv = NULL;
 
 	if (!list_empty(&trans->new_bgs))
-		btrfs_create_pending_block_groups(trans, info);
+		btrfs_create_pending_block_groups(trans);
 
 	trans->delayed_ref_updates = 0;
 	if (!trans->sync) {
@@ -864,16 +859,15 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 			must_run_delayed_refs = 2;
 	}
 
-	btrfs_trans_release_metadata(trans, info);
+	btrfs_trans_release_metadata(trans);
 	trans->block_rsv = NULL;
 
 	if (!list_empty(&trans->new_bgs))
-		btrfs_create_pending_block_groups(trans, info);
+		btrfs_create_pending_block_groups(trans);
 
 	btrfs_trans_release_chunk_metadata(trans);
 
-	if (lock && !atomic_read(&info->open_ioctl_trans) &&
-	    should_end_transaction(trans) &&
+	if (lock && should_end_transaction(trans) &&
 	    READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
 		spin_lock(&info->trans_lock);
 		if (cur_trans->state == TRANS_STATE_RUNNING)
@@ -1072,40 +1066,33 @@ int btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark)
 }
 
 /*
- * when btree blocks are allocated, they have some corresponding bits set for
- * them in one of two extent_io trees.  This is used to make sure all of
- * those extents are on disk for transaction or log commit
+ * When btree blocks are allocated the corresponding extents are marked dirty.
+ * This function ensures such extents are persisted on disk for transaction or
+ * log commit.
+ *
+ * @trans: transaction whose dirty pages we'd like to write
  */
-static int btrfs_write_and_wait_marked_extents(struct btrfs_fs_info *fs_info,
-				struct extent_io_tree *dirty_pages, int mark)
+static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans)
 {
 	int ret;
 	int ret2;
+	struct extent_io_tree *dirty_pages = &trans->transaction->dirty_pages;
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct blk_plug plug;
 
 	blk_start_plug(&plug);
-	ret = btrfs_write_marked_extents(fs_info, dirty_pages, mark);
+	ret = btrfs_write_marked_extents(fs_info, dirty_pages, EXTENT_DIRTY);
 	blk_finish_plug(&plug);
 	ret2 = btrfs_wait_extents(fs_info, dirty_pages);
 
+	clear_btree_io_tree(&trans->transaction->dirty_pages);
+
 	if (ret)
 		return ret;
-	if (ret2)
+	else if (ret2)
 		return ret2;
-	return 0;
-}
-
-static int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
-					    struct btrfs_fs_info *fs_info)
-{
-	int ret;
-
-	ret = btrfs_write_and_wait_marked_extents(fs_info,
-					   &trans->transaction->dirty_pages,
-					   EXTENT_DIRTY);
-	clear_btree_io_tree(&trans->transaction->dirty_pages);
-
-	return ret;
+	else
+		return 0;
 }
 
 /*
@@ -1155,9 +1142,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
  * failures will cause the file system to go offline. We still need
  * to clean up the delayed refs.
  */
-static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
-					 struct btrfs_fs_info *fs_info)
+static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
 	struct list_head *io_bgs = &trans->transaction->io_bgs;
 	struct list_head *next;
@@ -1173,7 +1160,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 	if (ret)
 		return ret;
 
-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 	if (ret)
 		return ret;
 
@@ -1192,7 +1179,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 		return ret;
 
 	/* run_qgroups might have added some more refs */
-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 	if (ret)
 		return ret;
 again:
@@ -1209,7 +1196,7 @@ again:
 		ret = update_cowonly_root(trans, root);
 		if (ret)
 			return ret;
-		ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+		ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 		if (ret)
 			return ret;
 	}
@@ -1218,7 +1205,7 @@ again:
 		ret = btrfs_write_dirty_block_groups(trans, fs_info);
 		if (ret)
 			return ret;
-		ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+		ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 		if (ret)
 			return ret;
 	}
@@ -1251,9 +1238,9 @@ void btrfs_add_dead_root(struct btrfs_root *root)
 /*
  * update all the cowonly tree roots on disk
  */
-static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
-				    struct btrfs_fs_info *fs_info)
+static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *gang[8];
 	int i;
 	int ret;
@@ -1297,7 +1284,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
 			spin_lock(&fs_info->fs_roots_radix_lock);
 			if (err)
 				break;
-			btrfs_qgroup_free_meta_all(root);
+			btrfs_qgroup_free_meta_all_pertrans(root);
 		}
 	}
 	spin_unlock(&fs_info->fs_roots_radix_lock);
@@ -1366,15 +1353,23 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
 		return 0;
 
 	/*
+	 * Ensure dirty @src will be commited.  Or, after comming
+	 * commit_fs_roots() and switch_commit_roots(), any dirty but not
+	 * recorded root will never be updated again, causing an outdated root
+	 * item.
+	 */
+	record_root_in_trans(trans, src, 1);
+
+	/*
 	 * We are going to commit transaction, see btrfs_commit_transaction()
 	 * comment for reason locking tree_log_mutex
 	 */
 	mutex_lock(&fs_info->tree_log_mutex);
 
-	ret = commit_fs_roots(trans, fs_info);
+	ret = commit_fs_roots(trans);
 	if (ret)
 		goto out;
-	ret = btrfs_qgroup_account_extents(trans, fs_info);
+	ret = btrfs_qgroup_account_extents(trans);
 	if (ret < 0)
 		goto out;
 
@@ -1397,11 +1392,11 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
 	 * like chunk and root tree, as they won't affect qgroup.
 	 * And we don't write super to avoid half committed status.
 	 */
-	ret = commit_cowonly_roots(trans, fs_info);
+	ret = commit_cowonly_roots(trans);
 	if (ret)
 		goto out;
-	switch_commit_roots(trans->transaction, fs_info);
-	ret = btrfs_write_and_wait_transaction(trans, fs_info);
+	switch_commit_roots(trans->transaction);
+	ret = btrfs_write_and_wait_transaction(trans);
 	if (ret)
 		btrfs_handle_fs_error(fs_info, ret,
 			"Error while writing out transaction for qgroup");
@@ -1430,9 +1425,10 @@ out:
  * the creation of the pending snapshots, just return 0.
  */
 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info,
 				   struct btrfs_pending_snapshot *pending)
 {
+
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_key key;
 	struct btrfs_root_item *new_root_item;
 	struct btrfs_root *tree_root = fs_info->tree_root;
@@ -1524,7 +1520,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	 * otherwise we corrupt the FS during
 	 * snapshot
 	 */
-	ret = btrfs_run_delayed_items(trans, fs_info);
+	ret = btrfs_run_delayed_items(trans);
 	if (ret) {	/* Transaction aborted */
 		btrfs_abort_transaction(trans, ret);
 		goto fail;
@@ -1620,7 +1616,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 		goto fail;
 	}
 
-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto fail;
@@ -1674,7 +1670,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 		}
 	}
 
-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto fail;
@@ -1699,8 +1695,7 @@ no_free_objectid:
 /*
  * create all the snapshots we've scheduled for creation
  */
-static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
-					     struct btrfs_fs_info *fs_info)
+static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans)
 {
 	struct btrfs_pending_snapshot *pending, *next;
 	struct list_head *head = &trans->transaction->pending_snapshots;
@@ -1708,7 +1703,7 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
 
 	list_for_each_entry_safe(pending, next, head, list) {
 		list_del(&pending->list);
-		ret = create_pending_snapshot(trans, fs_info, pending);
+		ret = create_pending_snapshot(trans, pending);
 		if (ret)
 			break;
 	}
@@ -1861,10 +1856,9 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 }
 
 
-static void cleanup_transaction(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root, int err)
+static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_transaction *cur_trans = trans->transaction;
 	DEFINE_WAIT(wait);
 
@@ -1904,7 +1898,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
 	btrfs_put_transaction(cur_trans);
 	btrfs_put_transaction(cur_trans);
 
-	trace_btrfs_transaction_commit(root);
+	trace_btrfs_transaction_commit(trans->root);
 
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
@@ -1959,13 +1953,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	/* make a pass through all the delayed refs we have so far
 	 * any runnings procs may add more while we are here
 	 */
-	ret = btrfs_run_delayed_refs(trans, fs_info, 0);
+	ret = btrfs_run_delayed_refs(trans, 0);
 	if (ret) {
 		btrfs_end_transaction(trans);
 		return ret;
 	}
 
-	btrfs_trans_release_metadata(trans, fs_info);
+	btrfs_trans_release_metadata(trans);
 	trans->block_rsv = NULL;
 
 	cur_trans = trans->transaction;
@@ -1978,9 +1972,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	smp_wmb();
 
 	if (!list_empty(&trans->new_bgs))
-		btrfs_create_pending_block_groups(trans, fs_info);
+		btrfs_create_pending_block_groups(trans);
 
-	ret = btrfs_run_delayed_refs(trans, fs_info, 0);
+	ret = btrfs_run_delayed_refs(trans, 0);
 	if (ret) {
 		btrfs_end_transaction(trans);
 		return ret;
@@ -2008,12 +2002,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 			run_it = 1;
 		mutex_unlock(&fs_info->ro_block_group_mutex);
 
-		if (run_it)
-			ret = btrfs_start_dirty_block_groups(trans, fs_info);
-	}
-	if (ret) {
-		btrfs_end_transaction(trans);
-		return ret;
+		if (run_it) {
+			ret = btrfs_start_dirty_block_groups(trans);
+			if (ret) {
+				btrfs_end_transaction(trans);
+				return ret;
+			}
+		}
 	}
 
 	spin_lock(&fs_info->trans_lock);
@@ -2061,7 +2056,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	if (ret)
 		goto cleanup_transaction;
 
-	ret = btrfs_run_delayed_items(trans, fs_info);
+	ret = btrfs_run_delayed_items(trans);
 	if (ret)
 		goto cleanup_transaction;
 
@@ -2069,7 +2064,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 		   extwriter_counter_read(cur_trans) == 0);
 
 	/* some pending stuffs might be added after the previous flush. */
-	ret = btrfs_run_delayed_items(trans, fs_info);
+	ret = btrfs_run_delayed_items(trans);
 	if (ret)
 		goto cleanup_transaction;
 
@@ -2106,7 +2101,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	 * deal with them in create_pending_snapshot(), which is the
 	 * core function of the snapshot creation.
 	 */
-	ret = create_pending_snapshots(trans, fs_info);
+	ret = create_pending_snapshots(trans);
 	if (ret) {
 		mutex_unlock(&fs_info->reloc_mutex);
 		goto scrub_continue;
@@ -2122,13 +2117,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	 * because all the tree which are snapshoted will be forced to COW
 	 * the nodes and leaves.
 	 */
-	ret = btrfs_run_delayed_items(trans, fs_info);
+	ret = btrfs_run_delayed_items(trans);
 	if (ret) {
 		mutex_unlock(&fs_info->reloc_mutex);
 		goto scrub_continue;
 	}
 
-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 	if (ret) {
 		mutex_unlock(&fs_info->reloc_mutex);
 		goto scrub_continue;
@@ -2157,7 +2152,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	 */
 	mutex_lock(&fs_info->tree_log_mutex);
 
-	ret = commit_fs_roots(trans, fs_info);
+	ret = commit_fs_roots(trans);
 	if (ret) {
 		mutex_unlock(&fs_info->tree_log_mutex);
 		mutex_unlock(&fs_info->reloc_mutex);
@@ -2179,7 +2174,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	 * commit_fs_roots() can call btrfs_save_ino_cache(), which generates
 	 * new delayed refs. Must handle them or qgroup can be wrong.
 	 */
-	ret = btrfs_run_delayed_refs(trans, fs_info, (unsigned long)-1);
+	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 	if (ret) {
 		mutex_unlock(&fs_info->tree_log_mutex);
 		mutex_unlock(&fs_info->reloc_mutex);
@@ -2190,14 +2185,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	 * Since fs roots are all committed, we can get a quite accurate
 	 * new_roots. So let's do quota accounting.
 	 */
-	ret = btrfs_qgroup_account_extents(trans, fs_info);
+	ret = btrfs_qgroup_account_extents(trans);
 	if (ret < 0) {
 		mutex_unlock(&fs_info->tree_log_mutex);
 		mutex_unlock(&fs_info->reloc_mutex);
 		goto scrub_continue;
 	}
 
-	ret = commit_cowonly_roots(trans, fs_info);
+	ret = commit_cowonly_roots(trans);
 	if (ret) {
 		mutex_unlock(&fs_info->tree_log_mutex);
 		mutex_unlock(&fs_info->reloc_mutex);
@@ -2229,7 +2224,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	list_add_tail(&fs_info->chunk_root->dirty_list,
 		      &cur_trans->switch_commits);
 
-	switch_commit_roots(cur_trans, fs_info);
+	switch_commit_roots(cur_trans);
 
 	ASSERT(list_empty(&cur_trans->dirty_bgs));
 	ASSERT(list_empty(&cur_trans->io_bgs));
@@ -2241,7 +2236,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	       sizeof(*fs_info->super_copy));
 
 	btrfs_update_commit_device_size(fs_info);
-	btrfs_update_commit_device_bytes_used(fs_info, cur_trans);
+	btrfs_update_commit_device_bytes_used(cur_trans);
 
 	clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
 	clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
@@ -2256,7 +2251,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
 	wake_up(&fs_info->transaction_wait);
 
-	ret = btrfs_write_and_wait_transaction(trans, fs_info);
+	ret = btrfs_write_and_wait_transaction(trans);
 	if (ret) {
 		btrfs_handle_fs_error(fs_info, ret,
 				      "Error while writing out transaction");
@@ -2273,7 +2268,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	if (ret)
 		goto scrub_continue;
 
-	btrfs_finish_extent_commit(trans, fs_info);
+	btrfs_finish_extent_commit(trans);
 
 	if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
 		btrfs_clear_space_info_full(fs_info);
@@ -2319,13 +2314,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 scrub_continue:
 	btrfs_scrub_continue(fs_info);
 cleanup_transaction:
-	btrfs_trans_release_metadata(trans, fs_info);
+	btrfs_trans_release_metadata(trans);
 	btrfs_trans_release_chunk_metadata(trans);
 	trans->block_rsv = NULL;
 	btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
-	cleanup_transaction(trans, trans->root, ret);
+	cleanup_transaction(trans, ret);
 
 	return ret;
 }
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 6beee072b1bd..b6c94ce33503 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -69,6 +69,22 @@ struct btrfs_transaction {
 	struct list_head pending_chunks;
 	struct list_head switch_commits;
 	struct list_head dirty_bgs;
+
+	/*
+	 * There is no explicit lock which protects io_bgs, rather its
+	 * consistency is implied by the fact that all the sites which modify
+	 * it do so under some form of transaction critical section, namely:
+	 *
+	 * - btrfs_start_dirty_block_groups - This function can only ever be
+	 *   run by one of the transaction committers. Refer to
+	 *   BTRFS_TRANS_DIRTY_BG_RUN usage in btrfs_commit_transaction
+	 *
+	 * - btrfs_write_dirty_blockgroups - this is called by
+	 *   commit_cowonly_roots from transaction critical section
+	 *   (TRANS_STATE_COMMIT_DOING)
+	 *
+	 * - btrfs_cleanup_dirty_bgs - called on transaction abort
+	 */
 	struct list_head io_bgs;
 	struct list_head dropped_roots;
 
@@ -89,21 +105,18 @@ struct btrfs_transaction {
 
 #define __TRANS_FREEZABLE	(1U << 0)
 
-#define __TRANS_USERSPACE	(1U << 8)
 #define __TRANS_START		(1U << 9)
 #define __TRANS_ATTACH		(1U << 10)
 #define __TRANS_JOIN		(1U << 11)
 #define __TRANS_JOIN_NOLOCK	(1U << 12)
 #define __TRANS_DUMMY		(1U << 13)
 
-#define TRANS_USERSPACE		(__TRANS_USERSPACE | __TRANS_FREEZABLE)
 #define TRANS_START		(__TRANS_START | __TRANS_FREEZABLE)
 #define TRANS_ATTACH		(__TRANS_ATTACH)
 #define TRANS_JOIN		(__TRANS_JOIN | __TRANS_FREEZABLE)
 #define TRANS_JOIN_NOLOCK	(__TRANS_JOIN_NOLOCK)
 
-#define TRANS_EXTWRITERS	(__TRANS_USERSPACE | __TRANS_START |	\
-				 __TRANS_ATTACH)
+#define TRANS_EXTWRITERS	(__TRANS_START | __TRANS_ATTACH)
 
 #define BTRFS_SEND_TRANS_STUB	((void *)1)
 
@@ -186,15 +199,11 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
 					struct btrfs_root *root,
 					unsigned int num_items,
 					int min_factor);
-struct btrfs_trans_handle *btrfs_start_transaction_lflush(
-					struct btrfs_root *root,
-					unsigned int num_items);
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
 					struct btrfs_root *root);
-struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
 int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
 
 void btrfs_add_dead_root(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index c3c8d48f6618..8871286c1a91 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -30,7 +30,6 @@
 #include "tree-checker.h"
 #include "disk-io.h"
 #include "compression.h"
-#include "hash.h"
 
 /*
  * Error message should follow the following format:
@@ -53,7 +52,8 @@
  * Allows callers to customize the output.
  */
 __printf(4, 5)
-static void generic_err(const struct btrfs_root *root,
+__cold
+static void generic_err(const struct btrfs_fs_info *fs_info,
 			const struct extent_buffer *eb, int slot,
 			const char *fmt, ...)
 {
@@ -65,10 +65,10 @@ static void generic_err(const struct btrfs_root *root,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	btrfs_crit(root->fs_info,
+	btrfs_crit(fs_info,
 		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
-		root->objectid, btrfs_header_bytenr(eb), slot, &vaf);
+		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
 	va_end(args);
 }
 
@@ -77,7 +77,8 @@ static void generic_err(const struct btrfs_root *root,
  * offset has its own meaning.
  */
 __printf(4, 5)
-static void file_extent_err(const struct btrfs_root *root,
+__cold
+static void file_extent_err(const struct btrfs_fs_info *fs_info,
 			    const struct extent_buffer *eb, int slot,
 			    const char *fmt, ...)
 {
@@ -91,10 +92,11 @@ static void file_extent_err(const struct btrfs_root *root,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	btrfs_crit(root->fs_info,
+	btrfs_crit(fs_info,
 	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
-		btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
-		btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf);
+		btrfs_header_level(eb) == 0 ? "leaf" : "node",
+		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+		key.objectid, key.offset, &vaf);
 	va_end(args);
 }
 
@@ -102,26 +104,26 @@ static void file_extent_err(const struct btrfs_root *root,
  * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
  * Else return 1
  */
-#define CHECK_FE_ALIGNED(root, leaf, slot, fi, name, alignment)		      \
+#define CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, name, alignment)	      \
 ({									      \
 	if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
-		file_extent_err((root), (leaf), (slot),			      \
+		file_extent_err((fs_info), (leaf), (slot),		      \
 	"invalid %s for file extent, have %llu, should be aligned to %u",     \
 			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
 			(alignment));					      \
 	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
 })
 
-static int check_extent_data_item(struct btrfs_root *root,
+static int check_extent_data_item(struct btrfs_fs_info *fs_info,
 				  struct extent_buffer *leaf,
 				  struct btrfs_key *key, int slot)
 {
 	struct btrfs_file_extent_item *fi;
-	u32 sectorsize = root->fs_info->sectorsize;
+	u32 sectorsize = fs_info->sectorsize;
 	u32 item_size = btrfs_item_size_nr(leaf, slot);
 
 	if (!IS_ALIGNED(key->offset, sectorsize)) {
-		file_extent_err(root, leaf, slot,
+		file_extent_err(fs_info, leaf, slot,
 "unaligned file_offset for file extent, have %llu should be aligned to %u",
 			key->offset, sectorsize);
 		return -EUCLEAN;
@@ -130,7 +132,7 @@ static int check_extent_data_item(struct btrfs_root *root,
 	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
 
 	if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
-		file_extent_err(root, leaf, slot,
+		file_extent_err(fs_info, leaf, slot,
 		"invalid type for file extent, have %u expect range [0, %u]",
 			btrfs_file_extent_type(leaf, fi),
 			BTRFS_FILE_EXTENT_TYPES);
@@ -142,14 +144,14 @@ static int check_extent_data_item(struct btrfs_root *root,
 	 * and must be caught in open_ctree().
 	 */
 	if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
-		file_extent_err(root, leaf, slot,
+		file_extent_err(fs_info, leaf, slot,
 	"invalid compression for file extent, have %u expect range [0, %u]",
 			btrfs_file_extent_compression(leaf, fi),
 			BTRFS_COMPRESS_TYPES);
 		return -EUCLEAN;
 	}
 	if (btrfs_file_extent_encryption(leaf, fi)) {
-		file_extent_err(root, leaf, slot,
+		file_extent_err(fs_info, leaf, slot,
 			"invalid encryption for file extent, have %u expect 0",
 			btrfs_file_extent_encryption(leaf, fi));
 		return -EUCLEAN;
@@ -157,7 +159,7 @@ static int check_extent_data_item(struct btrfs_root *root,
 	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
 		/* Inline extent must have 0 as key offset */
 		if (key->offset) {
-			file_extent_err(root, leaf, slot,
+			file_extent_err(fs_info, leaf, slot,
 		"invalid file_offset for inline file extent, have %llu expect 0",
 				key->offset);
 			return -EUCLEAN;
@@ -171,7 +173,7 @@ static int check_extent_data_item(struct btrfs_root *root,
 		/* Uncompressed inline extent size must match item size */
 		if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
 		    btrfs_file_extent_ram_bytes(leaf, fi)) {
-			file_extent_err(root, leaf, slot,
+			file_extent_err(fs_info, leaf, slot,
 	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
 				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
 				btrfs_file_extent_ram_bytes(leaf, fi));
@@ -182,40 +184,41 @@ static int check_extent_data_item(struct btrfs_root *root,
 
 	/* Regular or preallocated extent has fixed item size */
 	if (item_size != sizeof(*fi)) {
-		file_extent_err(root, leaf, slot,
+		file_extent_err(fs_info, leaf, slot,
 	"invalid item size for reg/prealloc file extent, have %u expect %zu",
 			item_size, sizeof(*fi));
 		return -EUCLEAN;
 	}
-	if (CHECK_FE_ALIGNED(root, leaf, slot, fi, ram_bytes, sectorsize) ||
-	    CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_bytenr, sectorsize) ||
-	    CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_num_bytes, sectorsize) ||
-	    CHECK_FE_ALIGNED(root, leaf, slot, fi, offset, sectorsize) ||
-	    CHECK_FE_ALIGNED(root, leaf, slot, fi, num_bytes, sectorsize))
+	if (CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, ram_bytes, sectorsize) ||
+	    CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_bytenr, sectorsize) ||
+	    CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, disk_num_bytes, sectorsize) ||
+	    CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, offset, sectorsize) ||
+	    CHECK_FE_ALIGNED(fs_info, leaf, slot, fi, num_bytes, sectorsize))
 		return -EUCLEAN;
 	return 0;
 }
 
-static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
-			   struct btrfs_key *key, int slot)
+static int check_csum_item(struct btrfs_fs_info *fs_info,
+			   struct extent_buffer *leaf, struct btrfs_key *key,
+			   int slot)
 {
-	u32 sectorsize = root->fs_info->sectorsize;
-	u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy);
+	u32 sectorsize = fs_info->sectorsize;
+	u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
 
 	if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
-		generic_err(root, leaf, slot,
+		generic_err(fs_info, leaf, slot,
 		"invalid key objectid for csum item, have %llu expect %llu",
 			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
 		return -EUCLEAN;
 	}
 	if (!IS_ALIGNED(key->offset, sectorsize)) {
-		generic_err(root, leaf, slot,
+		generic_err(fs_info, leaf, slot,
 	"unaligned key offset for csum item, have %llu should be aligned to %u",
 			key->offset, sectorsize);
 		return -EUCLEAN;
 	}
 	if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
-		generic_err(root, leaf, slot,
+		generic_err(fs_info, leaf, slot,
 	"unaligned item size for csum item, have %u should be aligned to %u",
 			btrfs_item_size_nr(leaf, slot), csumsize);
 		return -EUCLEAN;
@@ -228,7 +231,8 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
  * which represents inode number
  */
 __printf(4, 5)
-static void dir_item_err(const struct btrfs_root *root,
+__cold
+static void dir_item_err(const struct btrfs_fs_info *fs_info,
 			 const struct extent_buffer *eb, int slot,
 			 const char *fmt, ...)
 {
@@ -242,14 +246,15 @@ static void dir_item_err(const struct btrfs_root *root,
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	btrfs_crit(root->fs_info,
+	btrfs_crit(fs_info,
 	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
-		btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
-		btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
+		btrfs_header_level(eb) == 0 ? "leaf" : "node",
+		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+		key.objectid, &vaf);
 	va_end(args);
 }
 
-static int check_dir_item(struct btrfs_root *root,
+static int check_dir_item(struct btrfs_fs_info *fs_info,
 			  struct extent_buffer *leaf,
 			  struct btrfs_key *key, int slot)
 {
@@ -268,7 +273,7 @@ static int check_dir_item(struct btrfs_root *root,
 
 		/* header itself should not cross item boundary */
 		if (cur + sizeof(*di) > item_size) {
-			dir_item_err(root, leaf, slot,
+			dir_item_err(fs_info, leaf, slot,
 		"dir item header crosses item boundary, have %zu boundary %u",
 				cur + sizeof(*di), item_size);
 			return -EUCLEAN;
@@ -277,7 +282,7 @@ static int check_dir_item(struct btrfs_root *root,
 		/* dir type check */
 		dir_type = btrfs_dir_type(leaf, di);
 		if (dir_type >= BTRFS_FT_MAX) {
-			dir_item_err(root, leaf, slot,
+			dir_item_err(fs_info, leaf, slot,
 			"invalid dir item type, have %u expect [0, %u)",
 				dir_type, BTRFS_FT_MAX);
 			return -EUCLEAN;
@@ -285,14 +290,14 @@ static int check_dir_item(struct btrfs_root *root,
 
 		if (key->type == BTRFS_XATTR_ITEM_KEY &&
 		    dir_type != BTRFS_FT_XATTR) {
-			dir_item_err(root, leaf, slot,
+			dir_item_err(fs_info, leaf, slot,
 		"invalid dir item type for XATTR key, have %u expect %u",
 				dir_type, BTRFS_FT_XATTR);
 			return -EUCLEAN;
 		}
 		if (dir_type == BTRFS_FT_XATTR &&
 		    key->type != BTRFS_XATTR_ITEM_KEY) {
-			dir_item_err(root, leaf, slot,
+			dir_item_err(fs_info, leaf, slot,
 			"xattr dir type found for non-XATTR key");
 			return -EUCLEAN;
 		}
@@ -305,21 +310,21 @@ static int check_dir_item(struct btrfs_root *root,
 		name_len = btrfs_dir_name_len(leaf, di);
 		data_len = btrfs_dir_data_len(leaf, di);
 		if (name_len > max_name_len) {
-			dir_item_err(root, leaf, slot,
+			dir_item_err(fs_info, leaf, slot,
 			"dir item name len too long, have %u max %u",
 				name_len, max_name_len);
 			return -EUCLEAN;
 		}
-		if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
-			dir_item_err(root, leaf, slot,
+		if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
+			dir_item_err(fs_info, leaf, slot,
 			"dir item name and data len too long, have %u max %u",
 				name_len + data_len,
-				BTRFS_MAX_XATTR_SIZE(root->fs_info));
+				BTRFS_MAX_XATTR_SIZE(fs_info));
 			return -EUCLEAN;
 		}
 
 		if (data_len && dir_type != BTRFS_FT_XATTR) {
-			dir_item_err(root, leaf, slot,
+			dir_item_err(fs_info, leaf, slot,
 			"dir item with invalid data len, have %u expect 0",
 				data_len);
 			return -EUCLEAN;
@@ -329,7 +334,7 @@ static int check_dir_item(struct btrfs_root *root,
 
 		/* header and name/data should not cross item boundary */
 		if (cur + total_size > item_size) {
-			dir_item_err(root, leaf, slot,
+			dir_item_err(fs_info, leaf, slot,
 		"dir item data crosses item boundary, have %u boundary %u",
 				cur + total_size, item_size);
 			return -EUCLEAN;
@@ -347,7 +352,7 @@ static int check_dir_item(struct btrfs_root *root,
 					(unsigned long)(di + 1), name_len);
 			name_hash = btrfs_name_hash(namebuf, name_len);
 			if (key->offset != name_hash) {
-				dir_item_err(root, leaf, slot,
+				dir_item_err(fs_info, leaf, slot,
 		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
 					name_hash, key->offset);
 				return -EUCLEAN;
@@ -362,7 +367,7 @@ static int check_dir_item(struct btrfs_root *root,
 /*
  * Common point to switch the item-specific validation.
  */
-static int check_leaf_item(struct btrfs_root *root,
+static int check_leaf_item(struct btrfs_fs_info *fs_info,
 			   struct extent_buffer *leaf,
 			   struct btrfs_key *key, int slot)
 {
@@ -370,24 +375,23 @@ static int check_leaf_item(struct btrfs_root *root,
 
 	switch (key->type) {
 	case BTRFS_EXTENT_DATA_KEY:
-		ret = check_extent_data_item(root, leaf, key, slot);
+		ret = check_extent_data_item(fs_info, leaf, key, slot);
 		break;
 	case BTRFS_EXTENT_CSUM_KEY:
-		ret = check_csum_item(root, leaf, key, slot);
+		ret = check_csum_item(fs_info, leaf, key, slot);
 		break;
 	case BTRFS_DIR_ITEM_KEY:
 	case BTRFS_DIR_INDEX_KEY:
 	case BTRFS_XATTR_ITEM_KEY:
-		ret = check_dir_item(root, leaf, key, slot);
+		ret = check_dir_item(fs_info, leaf, key, slot);
 		break;
 	}
 	return ret;
 }
 
-static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
+static int check_leaf(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf,
 		      bool check_item_data)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	/* No valid key type is 0, so all key should be larger than this key */
 	struct btrfs_key prev_key = {0, 0, 0};
 	struct btrfs_key key;
@@ -420,7 +424,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 			eb = btrfs_root_node(check_root);
 			/* if leaf is the root, then it's fine */
 			if (leaf != eb) {
-				generic_err(check_root, leaf, 0,
+				generic_err(fs_info, leaf, 0,
 		"invalid nritems, have %u should not be 0 for non-root leaf",
 					nritems);
 				free_extent_buffer(eb);
@@ -453,7 +457,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 
 		/* Make sure the keys are in the right order */
 		if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
-			generic_err(root, leaf, slot,
+			generic_err(fs_info, leaf, slot,
 	"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
 				prev_key.objectid, prev_key.type,
 				prev_key.offset, key.objectid, key.type,
@@ -472,7 +476,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 			item_end_expected = btrfs_item_offset_nr(leaf,
 								 slot - 1);
 		if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
-			generic_err(root, leaf, slot,
+			generic_err(fs_info, leaf, slot,
 				"unexpected item end, have %u expect %u",
 				btrfs_item_end_nr(leaf, slot),
 				item_end_expected);
@@ -486,7 +490,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 		 */
 		if (btrfs_item_end_nr(leaf, slot) >
 		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
-			generic_err(root, leaf, slot,
+			generic_err(fs_info, leaf, slot,
 			"slot end outside of leaf, have %u expect range [0, %u]",
 				btrfs_item_end_nr(leaf, slot),
 				BTRFS_LEAF_DATA_SIZE(fs_info));
@@ -496,7 +500,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 		/* Also check if the item pointer overlaps with btrfs item. */
 		if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
 		    btrfs_item_ptr_offset(leaf, slot)) {
-			generic_err(root, leaf, slot,
+			generic_err(fs_info, leaf, slot,
 		"slot overlaps with its data, item end %lu data start %lu",
 				btrfs_item_nr_offset(slot) +
 				sizeof(struct btrfs_item),
@@ -509,7 +513,7 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 			 * Check if the item size and content meet other
 			 * criteria
 			 */
-			ret = check_leaf_item(root, leaf, &key, slot);
+			ret = check_leaf_item(fs_info, leaf, &key, slot);
 			if (ret < 0)
 				return ret;
 		}
@@ -522,18 +526,19 @@ static int check_leaf(struct btrfs_root *root, struct extent_buffer *leaf,
 	return 0;
 }
 
-int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf)
+int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
+			  struct extent_buffer *leaf)
 {
-	return check_leaf(root, leaf, true);
+	return check_leaf(fs_info, leaf, true);
 }
 
-int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
 			     struct extent_buffer *leaf)
 {
-	return check_leaf(root, leaf, false);
+	return check_leaf(fs_info, leaf, false);
 }
 
-int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
+int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node)
 {
 	unsigned long nr = btrfs_header_nritems(node);
 	struct btrfs_key key, next_key;
@@ -541,12 +546,12 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
 	u64 bytenr;
 	int ret = 0;
 
-	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) {
-		btrfs_crit(root->fs_info,
+	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info)) {
+		btrfs_crit(fs_info,
 "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
-			   root->objectid, node->start,
+			   btrfs_header_owner(node), node->start,
 			   nr == 0 ? "small" : "large", nr,
-			   BTRFS_NODEPTRS_PER_BLOCK(root->fs_info));
+			   BTRFS_NODEPTRS_PER_BLOCK(fs_info));
 		return -EUCLEAN;
 	}
 
@@ -556,21 +561,21 @@ int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node)
 		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
 
 		if (!bytenr) {
-			generic_err(root, node, slot,
+			generic_err(fs_info, node, slot,
 				"invalid NULL node pointer");
 			ret = -EUCLEAN;
 			goto out;
 		}
-		if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) {
-			generic_err(root, node, slot,
+		if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
+			generic_err(fs_info, node, slot,
 			"unaligned pointer, have %llu should be aligned to %u",
-				bytenr, root->fs_info->sectorsize);
+				bytenr, fs_info->sectorsize);
 			ret = -EUCLEAN;
 			goto out;
 		}
 
 		if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
-			generic_err(root, node, slot,
+			generic_err(fs_info, node, slot,
 	"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
 				key.objectid, key.type, key.offset,
 				next_key.objectid, next_key.type,
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h
index 3d53e8d6fda0..aba542755710 100644
--- a/fs/btrfs/tree-checker.h
+++ b/fs/btrfs/tree-checker.h
@@ -25,14 +25,15 @@
  * Will check not only the item pointers, but also every possible member
  * in item data.
  */
-int btrfs_check_leaf_full(struct btrfs_root *root, struct extent_buffer *leaf);
+int btrfs_check_leaf_full(struct btrfs_fs_info *fs_info,
+			  struct extent_buffer *leaf);
 
 /*
  * Less strict leaf checker.
  * Will only check item pointers, not reading item data.
  */
-int btrfs_check_leaf_relaxed(struct btrfs_root *root,
+int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info,
 			     struct extent_buffer *leaf);
-int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node);
+int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node);
 
 #endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index cb65089127cc..c09dbe4bd6e7 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -39,7 +39,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 	int level;
 	int next_key_ret = 0;
 	u64 last_ret = 0;
-	u64 min_trans = 0;
 
 	if (root->fs_info->extent_root == root) {
 		/*
@@ -81,7 +80,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 
 	path->keep_locks = 1;
 
-	ret = btrfs_search_forward(root, &key, path, min_trans);
+	ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
 	if (ret < 0)
 		goto out;
 	if (ret > 0) {
@@ -130,7 +129,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 	 */
 	path->slots[1] = btrfs_header_nritems(path->nodes[1]);
 	next_key_ret = btrfs_find_next_key(root, path, &key, 1,
-					   min_trans);
+					   BTRFS_OLDEST_GENERATION);
 	if (next_key_ret == 0) {
 		memcpy(&root->defrag_progress, &key, sizeof(key));
 		ret = -EAGAIN;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 434457794c27..c91babc6aa4b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -21,12 +21,12 @@
 #include <linux/blkdev.h>
 #include <linux/list_sort.h>
 #include <linux/iversion.h>
+#include "ctree.h"
 #include "tree-log.h"
 #include "disk-io.h"
 #include "locking.h"
 #include "print-tree.h"
 #include "backref.h"
-#include "hash.h"
 #include "compression.h"
 #include "qgroup.h"
 #include "inode-map.h"
@@ -286,7 +286,7 @@ struct walk_control {
 	 * inside it
 	 */
 	int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
-			    struct walk_control *wc, u64 gen);
+			    struct walk_control *wc, u64 gen, int level);
 };
 
 /*
@@ -294,7 +294,7 @@ struct walk_control {
  */
 static int process_one_buffer(struct btrfs_root *log,
 			      struct extent_buffer *eb,
-			      struct walk_control *wc, u64 gen)
+			      struct walk_control *wc, u64 gen, int level)
 {
 	struct btrfs_fs_info *fs_info = log->fs_info;
 	int ret = 0;
@@ -304,7 +304,7 @@ static int process_one_buffer(struct btrfs_root *log,
 	 * pin down any logged extents, so we have to read the block.
 	 */
 	if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
-		ret = btrfs_read_buffer(eb, gen);
+		ret = btrfs_read_buffer(eb, gen, level, NULL);
 		if (ret)
 			return ret;
 	}
@@ -853,7 +853,6 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 				      struct btrfs_inode *dir,
 				      struct btrfs_dir_item *di)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct inode *inode;
 	char *name;
 	int name_len;
@@ -887,7 +886,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto out;
 	else
-		ret = btrfs_run_delayed_items(trans, fs_info);
+		ret = btrfs_run_delayed_items(trans);
 out:
 	kfree(name);
 	iput(inode);
@@ -1007,7 +1006,6 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
 				  u64 ref_index, char *name, int namelen,
 				  int *search_done)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	int ret;
 	char *victim_name;
 	int victim_name_len;
@@ -1065,7 +1063,7 @@ again:
 				kfree(victim_name);
 				if (ret)
 					return ret;
-				ret = btrfs_run_delayed_items(trans, fs_info);
+				ret = btrfs_run_delayed_items(trans);
 				if (ret)
 					return ret;
 				*search_done = 1;
@@ -1136,8 +1134,7 @@ again:
 							victim_name_len);
 					if (!ret)
 						ret = btrfs_run_delayed_items(
-								  trans,
-								  fs_info);
+								  trans);
 				}
 				iput(victim_parent);
 				kfree(victim_name);
@@ -2098,7 +2095,6 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
 				      struct inode *dir,
 				      struct btrfs_key *dir_key)
 {
-	struct btrfs_fs_info *fs_info = root->fs_info;
 	int ret;
 	struct extent_buffer *eb;
 	int slot;
@@ -2162,7 +2158,7 @@ again:
 			ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
 					BTRFS_I(inode), name, name_len);
 			if (!ret)
-				ret = btrfs_run_delayed_items(trans, fs_info);
+				ret = btrfs_run_delayed_items(trans);
 			kfree(name);
 			iput(inode);
 			if (ret)
@@ -2410,17 +2406,16 @@ out:
  * back refs).
  */
 static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
-			     struct walk_control *wc, u64 gen)
+			     struct walk_control *wc, u64 gen, int level)
 {
 	int nritems;
 	struct btrfs_path *path;
 	struct btrfs_root *root = wc->replay_dest;
 	struct btrfs_key key;
-	int level;
 	int i;
 	int ret;
 
-	ret = btrfs_read_buffer(eb, gen);
+	ret = btrfs_read_buffer(eb, gen, level, NULL);
 	if (ret)
 		return ret;
 
@@ -2537,6 +2532,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 	WARN_ON(*level >= BTRFS_MAX_LEVEL);
 
 	while (*level > 0) {
+		struct btrfs_key first_key;
+
 		WARN_ON(*level < 0);
 		WARN_ON(*level >= BTRFS_MAX_LEVEL);
 		cur = path->nodes[*level];
@@ -2549,6 +2546,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
 		bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
 		ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+		btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
 		blocksize = fs_info->nodesize;
 
 		parent = path->nodes[*level];
@@ -2559,7 +2557,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 			return PTR_ERR(next);
 
 		if (*level == 1) {
-			ret = wc->process_func(root, next, wc, ptr_gen);
+			ret = wc->process_func(root, next, wc, ptr_gen,
+					       *level - 1);
 			if (ret) {
 				free_extent_buffer(next);
 				return ret;
@@ -2567,7 +2566,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
 			path->slots[*level]++;
 			if (wc->free) {
-				ret = btrfs_read_buffer(next, ptr_gen);
+				ret = btrfs_read_buffer(next, ptr_gen,
+							*level - 1, &first_key);
 				if (ret) {
 					free_extent_buffer(next);
 					return ret;
@@ -2597,7 +2597,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 			free_extent_buffer(next);
 			continue;
 		}
-		ret = btrfs_read_buffer(next, ptr_gen);
+		ret = btrfs_read_buffer(next, ptr_gen, *level - 1, &first_key);
 		if (ret) {
 			free_extent_buffer(next);
 			return ret;
@@ -2647,7 +2647,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 
 			root_owner = btrfs_header_owner(parent);
 			ret = wc->process_func(root, path->nodes[*level], wc,
-				 btrfs_header_generation(path->nodes[*level]));
+				 btrfs_header_generation(path->nodes[*level]),
+				 *level);
 			if (ret)
 				return ret;
 
@@ -2729,7 +2730,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
 	/* was the root node processed? if not, catch it here */
 	if (path->nodes[orig_level]) {
 		ret = wc->process_func(log, path->nodes[orig_level], wc,
-			 btrfs_header_generation(path->nodes[orig_level]));
+			 btrfs_header_generation(path->nodes[orig_level]),
+			 orig_level);
 		if (ret)
 			goto out;
 		if (wc->free) {
@@ -3972,6 +3974,7 @@ fill_holes:
 			ASSERT(ret == 0);
 			src = src_path->nodes[0];
 			i = 0;
+			need_find_last_extent = true;
 		}
 
 		btrfs_item_key_to_cpu(src, &key, i);
@@ -4006,6 +4009,36 @@ fill_holes:
 			break;
 		*last_extent = extent_end;
 	}
+
+	/*
+	 * Check if there is a hole between the last extent found in our leaf
+	 * and the first extent in the next leaf. If there is one, we need to
+	 * log an explicit hole so that at replay time we can punch the hole.
+	 */
+	if (ret == 0 &&
+	    key.objectid == btrfs_ino(inode) &&
+	    key.type == BTRFS_EXTENT_DATA_KEY &&
+	    i == btrfs_header_nritems(src_path->nodes[0])) {
+		ret = btrfs_next_leaf(inode->root, src_path);
+		need_find_last_extent = true;
+		if (ret > 0) {
+			ret = 0;
+		} else if (ret == 0) {
+			btrfs_item_key_to_cpu(src_path->nodes[0], &key,
+					      src_path->slots[0]);
+			if (key.objectid == btrfs_ino(inode) &&
+			    key.type == BTRFS_EXTENT_DATA_KEY &&
+			    *last_extent < key.offset) {
+				const u64 len = key.offset - *last_extent;
+
+				ret = btrfs_insert_file_extent(trans, log,
+							       btrfs_ino(inode),
+							       *last_extent, 0,
+							       0, len, 0, len,
+							       0, 0, 0);
+			}
+		}
+	}
 	/*
 	 * Need to let the callers know we dropped the path so they should
 	 * re-search.
@@ -5517,7 +5550,6 @@ out:
  * the last committed transaction
  */
 static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
-				  struct btrfs_root *root,
 				  struct btrfs_inode *inode,
 				  struct dentry *parent,
 				  const loff_t start,
@@ -5525,6 +5557,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 				  int inode_only,
 				  struct btrfs_log_ctx *ctx)
 {
+	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct super_block *sb;
 	struct dentry *old_parent = NULL;
@@ -5550,7 +5583,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		goto end_no_trans;
 	}
 
-	if (root != inode->root || btrfs_root_refs(&root->root_item) == 0) {
+	if (btrfs_root_refs(&root->root_item) == 0) {
 		ret = 1;
 		goto end_no_trans;
 	}
@@ -5682,7 +5715,7 @@ end_no_trans:
  * data on disk.
  */
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, struct dentry *dentry,
+			  struct dentry *dentry,
 			  const loff_t start,
 			  const loff_t end,
 			  struct btrfs_log_ctx *ctx)
@@ -5690,8 +5723,8 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 	struct dentry *parent = dget_parent(dentry);
 	int ret;
 
-	ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
-			parent, start, end, LOG_INODE_ALL, ctx);
+	ret = btrfs_log_inode_parent(trans, BTRFS_I(d_inode(dentry)), parent,
+				     start, end, LOG_INODE_ALL, ctx);
 	dput(parent);
 
 	return ret;
@@ -5953,7 +5986,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 			struct dentry *parent)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
-	struct btrfs_root *root = inode->root;
 
 	/*
 	 * this will force the logging code to walk the dentry chain
@@ -5970,7 +6002,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	    (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
 		return 0;
 
-	return btrfs_log_inode_parent(trans, root, inode, parent, 0,
-				      LLONG_MAX, LOG_INODE_EXISTS, NULL);
+	return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
+				      LOG_INODE_EXISTS, NULL);
 }
 
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 483027f9a7f4..88abc43312a1 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -65,7 +65,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
 			     struct btrfs_fs_info *fs_info);
 int btrfs_recover_log_trees(struct btrfs_root *tree_root);
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, struct dentry *dentry,
+			  struct dentry *dentry,
 			  const loff_t start,
 			  const loff_t end,
 			  struct btrfs_log_ctx *ctx);
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 726f928238d0..9916f03430bc 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -282,7 +282,7 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
 	key.offset = 0;
 
 again_search_slot:
-	ret = btrfs_search_forward(root, &key, path, 0);
+	ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
 	if (ret) {
 		if (ret > 0)
 			ret = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b2d05c6b1c56..93f8f17cacca 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -27,6 +27,7 @@
 #include <linux/raid/pq.h>
 #include <linux/semaphore.h>
 #include <linux/uuid.h>
+#include <linux/list_sort.h>
 #include <asm/div64.h>
 #include "ctree.h"
 #include "extent_map.h"
@@ -278,7 +279,7 @@ static void btrfs_kobject_uevent(struct block_device *bdev,
 			&disk_to_dev(bdev->bd_disk)->kobj);
 }
 
-void btrfs_cleanup_fs_uuids(void)
+void __exit btrfs_cleanup_fs_uuids(void)
 {
 	struct btrfs_fs_devices *fs_devices;
 
@@ -708,7 +709,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
 	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
 		fs_devices->rw_devices++;
-		list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
+		list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
 	}
 	brelse(bh);
 
@@ -895,7 +896,11 @@ error:
 	return ERR_PTR(-ENOMEM);
 }
 
-void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
+/*
+ * After we have read the system tree and know devids belonging to
+ * this filesystem, remove the device which does not belong there.
+ */
+void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
 {
 	struct btrfs_device *device, *next;
 	struct btrfs_device *latest_dev = NULL;
@@ -1103,6 +1108,20 @@ out:
 	return ret;
 }
 
+static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct btrfs_device *dev1, *dev2;
+
+	dev1 = list_entry(a, struct btrfs_device, dev_list);
+	dev2 = list_entry(b, struct btrfs_device, dev_list);
+
+	if (dev1->devid < dev2->devid)
+		return -1;
+	else if (dev1->devid > dev2->devid)
+		return 1;
+	return 0;
+}
+
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       fmode_t flags, void *holder)
 {
@@ -1113,6 +1132,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		fs_devices->opened++;
 		ret = 0;
 	} else {
+		list_sort(NULL, &fs_devices->devices, devid_cmp);
 		ret = __btrfs_open_devices(fs_devices, flags, holder);
 	}
 	mutex_unlock(&uuid_mutex);
@@ -1916,12 +1936,12 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 	mutex_lock(&uuid_mutex);
 
 	num_devices = fs_info->fs_devices->num_devices;
-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
 		WARN_ON(num_devices < 1);
 		num_devices--;
 	}
-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 
 	ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
 	if (ret)
@@ -2047,7 +2067,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_fs_devices *fs_devices;
 
-	WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
+	lockdep_assert_held(&fs_info->fs_devices->device_list_mutex);
 
 	/*
 	 * in case of fs with no seed, srcdev->fs_devices will point
@@ -2237,7 +2257,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
 	struct btrfs_device *device;
 	u64 super_flags;
 
-	BUG_ON(!mutex_is_locked(&uuid_mutex));
+	lockdep_assert_held(&uuid_mutex);
 	if (!fs_devices->seeding)
 		return -EINVAL;
 
@@ -2642,7 +2662,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	device->total_bytes = btrfs_device_get_total_bytes(srcdev);
 	device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
 	device->bytes_used = btrfs_device_get_bytes_used(srcdev);
-	ASSERT(list_empty(&srcdev->resized_list));
 	device->commit_total_bytes = srcdev->commit_total_bytes;
 	device->commit_bytes_used = device->bytes_used;
 	device->fs_info = fs_info;
@@ -2666,19 +2685,6 @@ error:
 	return ret;
 }
 
-void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
-					      struct btrfs_device *tgtdev)
-{
-	u32 sectorsize = fs_info->sectorsize;
-
-	WARN_ON(fs_info->fs_devices->rw_devices == 0);
-	tgtdev->io_width = sectorsize;
-	tgtdev->io_align = sectorsize;
-	tgtdev->sector_size = sectorsize;
-	tgtdev->fs_info = fs_info;
-	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &tgtdev->dev_state);
-}
-
 static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
 					struct btrfs_device *device)
 {
@@ -2984,7 +2990,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 	 * we release the path used to search the chunk/dev tree and before
 	 * the current task acquires this mutex and calls us.
 	 */
-	ASSERT(mutex_is_locked(&fs_info->delete_unused_bgs_mutex));
+	lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
 
 	ret = btrfs_can_relocate(fs_info, chunk_offset);
 	if (ret)
@@ -2997,6 +3003,16 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 	if (ret)
 		return ret;
 
+	/*
+	 * We add the kobjects here (and after forcing data chunk creation)
+	 * since relocation is the only place we'll create chunks of a new
+	 * type at runtime.  The only place where we'll remove the last
+	 * chunk of a type is the call immediately below this one.  Even
+	 * so, we're protected against races with the cleaner thread since
+	 * we're covered by the delete_unused_bgs_mutex.
+	 */
+	btrfs_add_raid_kobjects(fs_info);
+
 	trans = btrfs_start_trans_remove_block_group(root->fs_info,
 						     chunk_offset);
 	if (IS_ERR(trans)) {
@@ -3124,6 +3140,8 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
 			if (ret < 0)
 				return ret;
 
+			btrfs_add_raid_kobjects(fs_info);
+
 			return 1;
 		}
 	}
@@ -3892,12 +3910,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 	}
 
 	num_devices = fs_info->fs_devices->num_devices;
-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
 		BUG_ON(num_devices < 1);
 		num_devices--;
 	}
-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
 	if (num_devices > 1)
 		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
@@ -4202,7 +4220,8 @@ static int btrfs_uuid_scan_kthread(void *data)
 	key.offset = 0;
 
 	while (1) {
-		ret = btrfs_search_forward(root, &key, path, 0);
+		ret = btrfs_search_forward(root, &key, path,
+				BTRFS_OLDEST_GENERATION);
 		if (ret) {
 			if (ret > 0)
 				ret = 0;
@@ -4672,7 +4691,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
 	btrfs_set_fs_incompat(info, RAID56);
 }
 
-#define BTRFS_MAX_DEVS(r) ((BTRFS_MAX_ITEM_SIZE(r->fs_info)		\
+#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info)	\
 			- sizeof(struct btrfs_chunk))		\
 			/ sizeof(struct btrfs_stripe) + 1)
 
@@ -4713,10 +4732,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
 	BUG_ON(!alloc_profile_is_valid(type, 0));
 
-	if (list_empty(&fs_devices->alloc_list))
+	if (list_empty(&fs_devices->alloc_list)) {
+		if (btrfs_test_opt(info, ENOSPC_DEBUG))
+			btrfs_debug(info, "%s: no writable device", __func__);
 		return -ENOSPC;
+	}
 
-	index = __get_raid_index(type);
+	index = btrfs_bg_flags_to_raid_index(type);
 
 	sub_stripes = btrfs_raid_array[index].sub_stripes;
 	dev_stripes = btrfs_raid_array[index].dev_stripes;
@@ -4729,7 +4751,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 		max_stripe_size = SZ_1G;
 		max_chunk_size = 10 * max_stripe_size;
 		if (!devs_max)
-			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
+			devs_max = BTRFS_MAX_DEVS(info);
 	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
 		/* for larger filesystems, use larger metadata chunks */
 		if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
@@ -4738,7 +4760,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			max_stripe_size = SZ_256M;
 		max_chunk_size = max_stripe_size;
 		if (!devs_max)
-			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
+			devs_max = BTRFS_MAX_DEVS(info);
 	} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
 		max_stripe_size = SZ_32M;
 		max_chunk_size = 2 * max_stripe_size;
@@ -4797,8 +4819,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 		if (ret == 0)
 			max_avail = max_stripe_size * dev_stripes;
 
-		if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
+		if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) {
+			if (btrfs_test_opt(info, ENOSPC_DEBUG))
+				btrfs_debug(info,
+			"%s: devid %llu has no free space, have=%llu want=%u",
+					    __func__, device->devid, max_avail,
+					    BTRFS_STRIPE_LEN * dev_stripes);
 			continue;
+		}
 
 		if (ndevs == fs_devices->rw_devices) {
 			WARN(1, "%s: found more than %llu devices\n",
@@ -4821,8 +4849,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	/* round down to number of usable stripes */
 	ndevs = round_down(ndevs, devs_increment);
 
-	if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
+	if (ndevs < devs_min) {
 		ret = -ENOSPC;
+		if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
+			btrfs_debug(info,
+	"%s: not enough devices with free space: have=%d minimum required=%d",
+				    __func__, ndevs, devs_min);
+		}
 		goto error;
 	}
 
@@ -4856,18 +4889,17 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	 * and compare that answer with the max chunk size
 	 */
 	if (stripe_size * data_stripes > max_chunk_size) {
-		u64 mask = (1ULL << 24) - 1;
-
 		stripe_size = div_u64(max_chunk_size, data_stripes);
 
 		/* bump the answer up to a 16MB boundary */
-		stripe_size = (stripe_size + mask) & ~mask;
+		stripe_size = round_up(stripe_size, SZ_16M);
 
-		/* but don't go higher than the limits we found
-		 * while searching for free extents
+		/*
+		 * But don't go higher than the limits we found while searching
+		 * for free extents
 		 */
-		if (stripe_size > devices_info[ndevs-1].max_avail)
-			stripe_size = devices_info[ndevs-1].max_avail;
+		stripe_size = min(devices_info[ndevs - 1].max_avail,
+				  stripe_size);
 	}
 
 	/* align to BTRFS_STRIPE_LEN */
@@ -5068,7 +5100,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 {
 	u64 chunk_offset;
 
-	ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
+	lockdep_assert_held(&fs_info->chunk_mutex);
 	chunk_offset = find_next_chunk(fs_info);
 	return __btrfs_alloc_chunk(trans, chunk_offset, type);
 }
@@ -5209,11 +5241,11 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 		ret = 1;
 	free_extent_map(em);
 
-	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
 	    fs_info->dev_replace.tgtdev)
 		ret++;
-	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+	btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
 
 	return ret;
 }
@@ -5254,13 +5286,25 @@ int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
 }
 
 static int find_live_mirror(struct btrfs_fs_info *fs_info,
-			    struct map_lookup *map, int first, int num,
-			    int optimal, int dev_replace_is_ongoing)
+			    struct map_lookup *map, int first,
+			    int dev_replace_is_ongoing)
 {
 	int i;
+	int num_stripes;
+	int preferred_mirror;
 	int tolerance;
 	struct btrfs_device *srcdev;
 
+	ASSERT((map->type &
+		 (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
+
+	if (map->type & BTRFS_BLOCK_GROUP_RAID10)
+		num_stripes = map->sub_stripes;
+	else
+		num_stripes = map->num_stripes;
+
+	preferred_mirror = first + current->pid % num_stripes;
+
 	if (dev_replace_is_ongoing &&
 	    fs_info->dev_replace.cont_reading_from_srcdev_mode ==
 	     BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
@@ -5274,10 +5318,10 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
 	 * mirror is available
 	 */
 	for (tolerance = 0; tolerance < 2; tolerance++) {
-		if (map->stripes[optimal].dev->bdev &&
-		    (tolerance || map->stripes[optimal].dev != srcdev))
-			return optimal;
-		for (i = first; i < first + num; i++) {
+		if (map->stripes[preferred_mirror].dev->bdev &&
+		    (tolerance || map->stripes[preferred_mirror].dev != srcdev))
+			return preferred_mirror;
+		for (i = first; i < first + num_stripes; i++) {
 			if (map->stripes[i].dev->bdev &&
 			    (tolerance || map->stripes[i].dev != srcdev))
 				return i;
@@ -5287,7 +5331,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
 	/* we couldn't find one that doesn't fail.  Just return something
 	 * and the io error handling code will clean up eventually
 	 */
-	return optimal;
+	return preferred_mirror;
 }
 
 static inline int parity_smaller(u64 a, u64 b)
@@ -5779,10 +5823,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	if (!bbio_ret)
 		goto out;
 
-	btrfs_dev_replace_lock(dev_replace, 0);
+	btrfs_dev_replace_read_lock(dev_replace);
 	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
 	if (!dev_replace_is_ongoing)
-		btrfs_dev_replace_unlock(dev_replace, 0);
+		btrfs_dev_replace_read_unlock(dev_replace);
 	else
 		btrfs_dev_replace_set_lock_blocking(dev_replace);
 
@@ -5814,8 +5858,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			stripe_index = mirror_num - 1;
 		else {
 			stripe_index = find_live_mirror(fs_info, map, 0,
-					    map->num_stripes,
-					    current->pid % map->num_stripes,
 					    dev_replace_is_ongoing);
 			mirror_num = stripe_index + 1;
 		}
@@ -5843,8 +5885,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 			int old_stripe_index = stripe_index;
 			stripe_index = find_live_mirror(fs_info, map,
 					      stripe_index,
-					      map->sub_stripes, stripe_index +
-					      current->pid % map->sub_stripes,
 					      dev_replace_is_ongoing);
 			mirror_num = stripe_index - old_stripe_index + 1;
 		}
@@ -5984,7 +6024,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 out:
 	if (dev_replace_is_ongoing) {
 		btrfs_dev_replace_clear_lock_blocking(dev_replace);
-		btrfs_dev_replace_unlock(dev_replace, 0);
+		btrfs_dev_replace_read_unlock(dev_replace);
 	}
 	free_extent_map(em);
 	return ret;
@@ -6618,7 +6658,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
 	struct btrfs_fs_devices *fs_devices;
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&uuid_mutex));
+	lockdep_assert_held(&uuid_mutex);
 	ASSERT(fsid);
 
 	fs_devices = fs_info->fs_devices->seed;
@@ -7358,20 +7398,20 @@ void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
 }
 
 /* Must be invoked during the transaction commit */
-void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
-					struct btrfs_transaction *transaction)
+void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct extent_map *em;
 	struct map_lookup *map;
 	struct btrfs_device *dev;
 	int i;
 
-	if (list_empty(&transaction->pending_chunks))
+	if (list_empty(&trans->pending_chunks))
 		return;
 
 	/* In order to kick the device replace finish process */
 	mutex_lock(&fs_info->chunk_mutex);
-	list_for_each_entry(em, &transaction->pending_chunks, list) {
+	list_for_each_entry(em, &trans->pending_chunks, list) {
 		map = em->map_lookup;
 
 		for (i = 0; i < map->num_stripes; i++) {
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 28c28eeadff3..d1fcaea9fef5 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -422,7 +422,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
 			  struct btrfs_fs_devices **fs_devices_ret);
 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
-void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
+void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
 void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
 		struct btrfs_device *device, struct btrfs_device *this_dev);
 int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
@@ -436,7 +436,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 					const u8 *uuid);
 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 		    const char *device_path, u64 devid);
-void btrfs_cleanup_fs_uuids(void);
+void __exit btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,
 		      struct btrfs_device *device, u64 new_size);
@@ -476,8 +476,6 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 				      struct btrfs_device *srcdev);
 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 				      struct btrfs_device *tgtdev);
-void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
-					      struct btrfs_device *tgtdev);
 void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
 int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
 			   u64 logical, u64 len);
@@ -546,9 +544,30 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
 	btrfs_dev_stat_set(dev, index, 0);
 }
 
+/*
+ * Convert block group flags (BTRFS_BLOCK_GROUP_*) to btrfs_raid_types, which
+ * can be used as index to access btrfs_raid_array[].
+ */
+static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
+{
+	if (flags & BTRFS_BLOCK_GROUP_RAID10)
+		return BTRFS_RAID_RAID10;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+		return BTRFS_RAID_RAID1;
+	else if (flags & BTRFS_BLOCK_GROUP_DUP)
+		return BTRFS_RAID_DUP;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID0)
+		return BTRFS_RAID_RAID0;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+		return BTRFS_RAID_RAID5;
+	else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+		return BTRFS_RAID_RAID6;
+
+	return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
+}
+
 void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
-void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
-					struct btrfs_transaction *transaction);
+void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
 
 struct list_head *btrfs_get_fs_uuids(void);
 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index de7d072c78ef..e1e8177deb5e 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -33,7 +33,7 @@
 #include "locking.h"
 
 
-ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
+int btrfs_getxattr(struct inode *inode, const char *name,
 				void *buffer, size_t size)
 {
 	struct btrfs_dir_item *di;
@@ -233,7 +233,7 @@ out:
 /*
  * @value: "" makes the attribute to empty, NULL removes it
  */
-int __btrfs_setxattr(struct btrfs_trans_handle *trans,
+int btrfs_setxattr(struct btrfs_trans_handle *trans,
 		     struct inode *inode, const char *name,
 		     const void *value, size_t size, int flags)
 {
@@ -374,7 +374,7 @@ static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
 				   const char *name, void *buffer, size_t size)
 {
 	name = xattr_full_name(handler, name);
-	return __btrfs_getxattr(inode, name, buffer, size);
+	return btrfs_getxattr(inode, name, buffer, size);
 }
 
 static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
@@ -383,7 +383,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
 				   size_t size, int flags)
 {
 	name = xattr_full_name(handler, name);
-	return __btrfs_setxattr(NULL, inode, name, buffer, size, flags);
+	return btrfs_setxattr(NULL, inode, name, buffer, size, flags);
 }
 
 static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
@@ -448,8 +448,8 @@ static int btrfs_initxattrs(struct inode *inode,
 		}
 		strcpy(name, XATTR_SECURITY_PREFIX);
 		strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
-		err = __btrfs_setxattr(trans, inode, name,
-				       xattr->value, xattr->value_len, 0);
+		err = btrfs_setxattr(trans, inode, name, xattr->value,
+				xattr->value_len, 0);
 		kfree(name);
 		if (err < 0)
 			break;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 15fc4743dc70..e215a3212a2a 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -23,13 +23,14 @@
 
 extern const struct xattr_handler *btrfs_xattr_handlers[];
 
-extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
+int btrfs_getxattr(struct inode *inode, const char *name,
 		void *buffer, size_t size);
-extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
+int btrfs_setxattr(struct btrfs_trans_handle *trans,
 			    struct inode *inode, const char *name,
 			    const void *value, size_t size, int flags);
+ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 
-extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
+int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
 				     struct inode *inode, struct inode *dir,
 				     const struct qstr *qstr);
 
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 687da62daf4e..741749a98614 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -187,13 +187,13 @@ config CIFS_NFSD_EXPORT
 	   Allows NFS server to export a CIFS mounted share (nfsd over cifs)
 
 config CIFS_SMB311
-	bool "SMB3.1.1 network file system support (Experimental)"
+	bool "SMB3.1.1 network file system support"
 	depends on CIFS
+	select CRYPTO_SHA512
 
 	help
-	  This enables experimental support for the newest, SMB3.1.1, dialect.
-	  This dialect includes improved security negotiation features.
-	  If unsure, say N
+	  This enables support for the newest, and most secure dialect, SMB3.11.
+	  If unsure, say Y
 
 config CIFS_SMB_DIRECT
 	bool "SMB Direct support (Experimental)"
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index f2b0a7f124da..a6ef088e057b 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -36,37 +36,6 @@
 #include <crypto/skcipher.h>
 #include <crypto/aead.h>
 
-static int
-cifs_crypto_shash_md5_allocate(struct TCP_Server_Info *server)
-{
-	int rc;
-	unsigned int size;
-
-	if (server->secmech.sdescmd5 != NULL)
-		return 0; /* already allocated */
-
-	server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
-	if (IS_ERR(server->secmech.md5)) {
-		cifs_dbg(VFS, "could not allocate crypto md5\n");
-		rc = PTR_ERR(server->secmech.md5);
-		server->secmech.md5 = NULL;
-		return rc;
-	}
-
-	size = sizeof(struct shash_desc) +
-			crypto_shash_descsize(server->secmech.md5);
-	server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
-	if (!server->secmech.sdescmd5) {
-		crypto_free_shash(server->secmech.md5);
-		server->secmech.md5 = NULL;
-		return -ENOMEM;
-	}
-	server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
-	server->secmech.sdescmd5->shash.flags = 0x0;
-
-	return 0;
-}
-
 int __cifs_calc_signature(struct smb_rqst *rqst,
 			struct TCP_Server_Info *server, char *signature,
 			struct shash_desc *shash)
@@ -132,13 +101,10 @@ static int cifs_calc_signature(struct smb_rqst *rqst,
 	if (!rqst->rq_iov || !signature || !server)
 		return -EINVAL;
 
-	if (!server->secmech.sdescmd5) {
-		rc = cifs_crypto_shash_md5_allocate(server);
-		if (rc) {
-			cifs_dbg(VFS, "%s: Can't alloc md5 crypto\n", __func__);
-			return -1;
-		}
-	}
+	rc = cifs_alloc_hash("md5", &server->secmech.md5,
+			     &server->secmech.sdescmd5);
+	if (rc)
+		return -1;
 
 	rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
 	if (rc) {
@@ -663,37 +629,6 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
 	return rc;
 }
 
-static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server)
-{
-	int rc;
-	unsigned int size;
-
-	/* check if already allocated */
-	if (server->secmech.sdeschmacmd5)
-		return 0;
-
-	server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
-	if (IS_ERR(server->secmech.hmacmd5)) {
-		cifs_dbg(VFS, "could not allocate crypto hmacmd5\n");
-		rc = PTR_ERR(server->secmech.hmacmd5);
-		server->secmech.hmacmd5 = NULL;
-		return rc;
-	}
-
-	size = sizeof(struct shash_desc) +
-			crypto_shash_descsize(server->secmech.hmacmd5);
-	server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
-	if (!server->secmech.sdeschmacmd5) {
-		crypto_free_shash(server->secmech.hmacmd5);
-		server->secmech.hmacmd5 = NULL;
-		return -ENOMEM;
-	}
-	server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
-	server->secmech.sdeschmacmd5->shash.flags = 0x0;
-
-	return 0;
-}
-
 int
 setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
 {
@@ -757,9 +692,10 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
 
 	mutex_lock(&ses->server->srv_mutex);
 
-	rc = crypto_hmacmd5_alloc(ses->server);
+	rc = cifs_alloc_hash("hmac(md5)",
+			     &ses->server->secmech.hmacmd5,
+			     &ses->server->secmech.sdeschmacmd5);
 	if (rc) {
-		cifs_dbg(VFS, "could not crypto alloc hmacmd5 rc %d\n", rc);
 		goto unlock;
 	}
 
@@ -893,6 +829,11 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server)
 		server->secmech.md5 = NULL;
 	}
 
+	if (server->secmech.sha512) {
+		crypto_free_shash(server->secmech.sha512);
+		server->secmech.sha512 = NULL;
+	}
+
 	if (server->secmech.hmacmd5) {
 		crypto_free_shash(server->secmech.hmacmd5);
 		server->secmech.hmacmd5 = NULL;
@@ -916,4 +857,6 @@ cifs_crypto_secmech_release(struct TCP_Server_Info *server)
 	server->secmech.sdeschmacmd5 = NULL;
 	kfree(server->secmech.sdescmd5);
 	server->secmech.sdescmd5 = NULL;
+	kfree(server->secmech.sdescsha512);
+	server->secmech.sdescsha512 = NULL;
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 32cdea67bbfd..f715609b13f3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1486,6 +1486,7 @@ MODULE_SOFTDEP("pre: nls");
 MODULE_SOFTDEP("pre: aes");
 MODULE_SOFTDEP("pre: cmac");
 MODULE_SOFTDEP("pre: sha256");
+MODULE_SOFTDEP("pre: sha512");
 MODULE_SOFTDEP("pre: aead2");
 MODULE_SOFTDEP("pre: ccm");
 module_init(init_cifs)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 48f7c197cd2d..2282562e78a1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -130,10 +130,12 @@ struct cifs_secmech {
 	struct crypto_shash *md5; /* md5 hash function */
 	struct crypto_shash *hmacsha256; /* hmac-sha256 hash function */
 	struct crypto_shash *cmacaes; /* block-cipher based MAC function */
+	struct crypto_shash *sha512; /* sha512 hash function */
 	struct sdesc *sdeschmacmd5;  /* ctxt to generate ntlmv2 hash, CR1 */
 	struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
 	struct sdesc *sdeschmacsha256;  /* ctxt to generate smb2 signature */
 	struct sdesc *sdesccmacaes;  /* ctxt to generate smb3 signature */
+	struct sdesc *sdescsha512; /* ctxt to generate smb3.11 signing key */
 	struct crypto_aead *ccmaesencrypt; /* smb3 encryption aead */
 	struct crypto_aead *ccmaesdecrypt; /* smb3 decryption aead */
 };
@@ -466,6 +468,7 @@ struct smb_version_values {
 	__u32		exclusive_lock_type;
 	__u32		shared_lock_type;
 	__u32		unlock_lock_type;
+	size_t		header_preamble_size;
 	size_t		header_size;
 	size_t		max_header_size;
 	size_t		read_rsp_size;
@@ -673,7 +676,8 @@ struct TCP_Server_Info {
 	unsigned int	max_read;
 	unsigned int	max_write;
 #ifdef CONFIG_CIFS_SMB311
-	__u8	preauth_sha_hash[64]; /* save initital negprot hash */
+	 /* save initital negprot hash */
+	__u8	preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
 #endif /* 3.1.1 */
 	struct delayed_work reconnect; /* reconnect workqueue job */
 	struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
@@ -862,7 +866,7 @@ struct cifs_ses {
 	__u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
 	__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
 #ifdef CONFIG_CIFS_SMB311
-	__u8 preauth_sha_hash[64];
+	__u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
 #endif /* 3.1.1 */
 };
 
@@ -1466,6 +1470,7 @@ struct dfs_info3_param {
 #define CIFS_FATTR_NEED_REVAL		0x4
 #define CIFS_FATTR_INO_COLLISION	0x8
 #define CIFS_FATTR_UNKNOWN_NLINK	0x10
+#define CIFS_FATTR_FAKE_ROOT_INO	0x20
 
 struct cifs_fattr {
 	u32		cf_flags;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 93d565186698..365a414a75e9 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -542,4 +542,9 @@ enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
 struct cifs_aio_ctx *cifs_aio_ctx_alloc(void);
 void cifs_aio_ctx_release(struct kref *refcount);
 int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw);
+
+int cifs_alloc_hash(const char *name, struct crypto_shash **shash,
+		    struct sdesc **sdesc);
+void cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc);
+
 #endif			/* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 9ceebf30eb22..59c09a596c0a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1454,7 +1454,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	unsigned int data_offset, data_len;
 	struct cifs_readdata *rdata = mid->callback_data;
 	char *buf = server->smallbuf;
-	unsigned int buflen = get_rfc1002_length(buf) + 4;
+	unsigned int buflen = get_rfc1002_length(buf) +
+		server->vals->header_preamble_size;
 	bool use_rdma_mr = false;
 
 	cifs_dbg(FYI, "%s: mid=%llu offset=%llu bytes=%u\n",
@@ -1504,7 +1505,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 		return cifs_readv_discard(server, mid);
 	}
 
-	data_offset = server->ops->read_data_offset(buf) + 4;
+	data_offset = server->ops->read_data_offset(buf) +
+		server->vals->header_preamble_size;
 	if (data_offset < server->total_read) {
 		/*
 		 * win2k8 sometimes sends an offset of 0 when the read
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a726f524fb84..4e0808f40195 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -775,7 +775,8 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	unsigned int pdu_length = get_rfc1002_length(buf);
 
 	/* make sure this will fit in a large buffer */
-	if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) - 4) {
+	if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server) -
+		server->vals->header_preamble_size) {
 		cifs_dbg(VFS, "SMB response too long (%u bytes)\n", pdu_length);
 		cifs_reconnect(server);
 		wake_up(&server->response_q);
@@ -791,7 +792,9 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 
 	/* now read the rest */
 	length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
-				pdu_length - HEADER_SIZE(server) + 1 + 4);
+				       pdu_length - HEADER_SIZE(server) + 1
+				       + server->vals->header_preamble_size);
+
 	if (length < 0)
 		return length;
 	server->total_read += length;
@@ -884,7 +887,8 @@ cifs_demultiplex_thread(void *p)
 			continue;
 
 		/* make sure we have enough to get to the MID */
-		if (pdu_length < HEADER_SIZE(server) - 1 - 4) {
+		if (pdu_length < HEADER_SIZE(server) - 1 -
+		    server->vals->header_preamble_size) {
 			cifs_dbg(VFS, "SMB response too short (%u bytes)\n",
 				 pdu_length);
 			cifs_reconnect(server);
@@ -893,8 +897,10 @@ cifs_demultiplex_thread(void *p)
 		}
 
 		/* read down to the MID */
-		length = cifs_read_from_socket(server, buf + 4,
-					       HEADER_SIZE(server) - 1 - 4);
+		length = cifs_read_from_socket(server,
+			     buf + server->vals->header_preamble_size,
+			     HEADER_SIZE(server) - 1
+			     - server->vals->header_preamble_size);
 		if (length < 0)
 			continue;
 		server->total_read += length;
@@ -4306,7 +4312,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 		 server->sec_mode, server->capabilities, server->timeAdj);
 
 	if (ses->auth_key.response) {
-		cifs_dbg(VFS, "Free previous auth_key.response = %p\n",
+		cifs_dbg(FYI, "Free previous auth_key.response = %p\n",
 			 ses->auth_key.response);
 		kfree(ses->auth_key.response);
 		ses->auth_key.response = NULL;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8f9a8cc7cc62..f856df4adae3 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -707,6 +707,18 @@ cgfi_exit:
 	return rc;
 }
 
+/* Simple function to return a 64 bit hash of string.  Rarely called */
+static __u64 simple_hashstr(const char *str)
+{
+	const __u64 hash_mult =  1125899906842597L; /* a big enough prime */
+	__u64 hash = 0;
+
+	while (*str)
+		hash = (hash + (__u64) *str++) * hash_mult;
+
+	return hash;
+}
+
 int
 cifs_get_inode_info(struct inode **inode, const char *full_path,
 		    FILE_ALL_INFO *data, struct super_block *sb, int xid,
@@ -816,6 +828,14 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
 						 tmprc);
 					fattr.cf_uniqueid = iunique(sb, ROOT_I);
 					cifs_autodisable_serverino(cifs_sb);
+				} else if ((fattr.cf_uniqueid == 0) &&
+						strlen(full_path) == 0) {
+					/* some servers ret bad root ino ie 0 */
+					cifs_dbg(FYI, "Invalid (0) inodenum\n");
+					fattr.cf_flags |=
+						CIFS_FATTR_FAKE_ROOT_INO;
+					fattr.cf_uniqueid =
+						simple_hashstr(tcon->treeName);
 				}
 			}
 		} else
@@ -832,6 +852,16 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
 				&fattr.cf_uniqueid, data);
 			if (tmprc)
 				fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+			else if ((fattr.cf_uniqueid == 0) &&
+					strlen(full_path) == 0) {
+				/*
+				 * Reuse existing root inode num since
+				 * inum zero for root causes ls of . and .. to
+				 * not be returned
+				 */
+				cifs_dbg(FYI, "Srv ret 0 inode num for root\n");
+				fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+			}
 		} else
 			fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
 	}
@@ -893,6 +923,9 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
 	}
 
 cgii_exit:
+	if ((*inode) && ((*inode)->i_ino == 0))
+		cifs_dbg(FYI, "inode number of zero returned\n");
+
 	kfree(buf);
 	cifs_put_tlink(tlink);
 	return rc;
@@ -1066,10 +1099,7 @@ iget_no_retry:
 
 out:
 	kfree(path);
-	/* can not call macro free_xid here since in a void func
-	 * TODO: This is no longer true
-	 */
-	_free_xid(xid);
+	free_xid(xid);
 	return inode;
 }
 
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 60b5a11ee11b..889a840172eb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -50,25 +50,12 @@ static int
 symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
 {
 	int rc;
-	unsigned int size;
-	struct crypto_shash *md5;
-	struct sdesc *sdescmd5;
-
-	md5 = crypto_alloc_shash("md5", 0, 0);
-	if (IS_ERR(md5)) {
-		rc = PTR_ERR(md5);
-		cifs_dbg(VFS, "%s: Crypto md5 allocation error %d\n",
-			 __func__, rc);
-		return rc;
-	}
-	size = sizeof(struct shash_desc) + crypto_shash_descsize(md5);
-	sdescmd5 = kmalloc(size, GFP_KERNEL);
-	if (!sdescmd5) {
-		rc = -ENOMEM;
+	struct crypto_shash *md5 = NULL;
+	struct sdesc *sdescmd5 = NULL;
+
+	rc = cifs_alloc_hash("md5", &md5, &sdescmd5);
+	if (rc)
 		goto symlink_hash_err;
-	}
-	sdescmd5->shash.tfm = md5;
-	sdescmd5->shash.flags = 0x0;
 
 	rc = crypto_shash_init(&sdescmd5->shash);
 	if (rc) {
@@ -85,9 +72,7 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
 		cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
 
 symlink_hash_err:
-	crypto_free_shash(md5);
-	kfree(sdescmd5);
-
+	cifs_free_hash(&md5, &sdescmd5);
 	return rc;
 }
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index a0dbced4a45c..460084a8eac5 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -848,3 +848,57 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 	iov_iter_bvec(&ctx->iter, ITER_BVEC | rw, ctx->bv, npages, ctx->len);
 	return 0;
 }
+
+/**
+ * cifs_alloc_hash - allocate hash and hash context together
+ *
+ * The caller has to make sure @sdesc is initialized to either NULL or
+ * a valid context. Both can be freed via cifs_free_hash().
+ */
+int
+cifs_alloc_hash(const char *name,
+		struct crypto_shash **shash, struct sdesc **sdesc)
+{
+	int rc = 0;
+	size_t size;
+
+	if (*sdesc != NULL)
+		return 0;
+
+	*shash = crypto_alloc_shash(name, 0, 0);
+	if (IS_ERR(*shash)) {
+		cifs_dbg(VFS, "could not allocate crypto %s\n", name);
+		rc = PTR_ERR(*shash);
+		*shash = NULL;
+		*sdesc = NULL;
+		return rc;
+	}
+
+	size = sizeof(struct shash_desc) + crypto_shash_descsize(*shash);
+	*sdesc = kmalloc(size, GFP_KERNEL);
+	if (*sdesc == NULL) {
+		cifs_dbg(VFS, "no memory left to allocate crypto %s\n", name);
+		crypto_free_shash(*shash);
+		*shash = NULL;
+		return -ENOMEM;
+	}
+
+	(*sdesc)->shash.tfm = *shash;
+	(*sdesc)->shash.flags = 0x0;
+	return 0;
+}
+
+/**
+ * cifs_free_hash - free hash and hash context together
+ *
+ * Freeing a NULL hash or context is safe.
+ */
+void
+cifs_free_hash(struct crypto_shash **shash, struct sdesc **sdesc)
+{
+	kfree(*sdesc);
+	*sdesc = NULL;
+	if (*shash)
+		crypto_free_shash(*shash);
+	*shash = NULL;
+}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 3d495e440c87..aff8ce8ba34d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1122,6 +1122,7 @@ struct smb_version_values smb1_values = {
 	.exclusive_lock_type = 0,
 	.shared_lock_type = LOCKING_ANDX_SHARED_LOCK,
 	.unlock_lock_type = 0,
+	.header_preamble_size = 4,
 	.header_size = sizeof(struct smb_hdr),
 	.max_header_size = MAX_CIFS_HDR_SIZE,
 	.read_rsp_size = sizeof(READ_RSP),
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 62c88dfed57b..3bfc9c990724 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -745,7 +745,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
 	"STATUS_NOLOGON_SERVER_TRUST_ACCOUNT"},
 	{STATUS_DOMAIN_TRUST_INCONSISTENT, -EIO,
 	"STATUS_DOMAIN_TRUST_INCONSISTENT"},
-	{STATUS_FS_DRIVER_REQUIRED, -EIO, "STATUS_FS_DRIVER_REQUIRED"},
+	{STATUS_FS_DRIVER_REQUIRED, -EOPNOTSUPP, "STATUS_FS_DRIVER_REQUIRED"},
 	{STATUS_IMAGE_ALREADY_LOADED_AS_DLL, -EIO,
 	"STATUS_IMAGE_ALREADY_LOADED_AS_DLL"},
 	{STATUS_NETWORK_OPEN_RESTRICTION, -EIO,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 76d03abaa38c..5406e95f5d92 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -150,7 +150,8 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
 		}
 		return 1;
 	}
-	if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE - 4) {
+	if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE -
+	    srvr->vals->header_preamble_size) {
 		cifs_dbg(VFS, "SMB length greater than maximum, mid=%llu\n",
 			 mid);
 		return 1;
@@ -189,26 +190,26 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
 		}
 	}
 
-	if (4 + len != length) {
-		cifs_dbg(VFS, "Total length %u RFC1002 length %u mismatch mid %llu\n",
-			 length, 4 + len, mid);
+	if (srvr->vals->header_preamble_size + len != length) {
+		cifs_dbg(VFS, "Total length %u RFC1002 length %zu mismatch mid %llu\n",
+			 length, srvr->vals->header_preamble_size + len, mid);
 		return 1;
 	}
 
 	clc_len = smb2_calc_size(hdr);
 
-	if (4 + len != clc_len) {
-		cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
-			 clc_len, 4 + len, mid);
+	if (srvr->vals->header_preamble_size + len != clc_len) {
+		cifs_dbg(FYI, "Calculated size %u length %zu mismatch mid %llu\n",
+			 clc_len, srvr->vals->header_preamble_size + len, mid);
 		/* create failed on symlink */
 		if (command == SMB2_CREATE_HE &&
 		    shdr->Status == STATUS_STOPPED_ON_SYMLINK)
 			return 0;
 		/* Windows 7 server returns 24 bytes more */
-		if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
+		if (clc_len + 24 - srvr->vals->header_preamble_size == len && command == SMB2_OPLOCK_BREAK_HE)
 			return 0;
 		/* server can return one byte more due to implied bcc[0] */
-		if (clc_len == 4 + len + 1)
+		if (clc_len == srvr->vals->header_preamble_size + len + 1)
 			return 0;
 
 		/*
@@ -218,10 +219,10 @@ smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
 		 * Log the server error (once), but allow it and continue
 		 * since the frame is parseable.
 		 */
-		if (clc_len < 4 /* RFC1001 header size */ + len) {
+		if (clc_len < srvr->vals->header_preamble_size /* RFC1001 header size */ + len) {
 			printk_once(KERN_WARNING
-				"SMB2 server sent bad RFC1001 len %d not %d\n",
-				len, clc_len - 4);
+				"SMB2 server sent bad RFC1001 len %d not %zu\n",
+				len, clc_len - srvr->vals->header_preamble_size);
 			return 0;
 		}
 
@@ -706,3 +707,67 @@ smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
 
 	return 0;
 }
+
+#ifdef CONFIG_CIFS_SMB311
+/**
+ * smb311_update_preauth_hash - update @ses hash with the packet data in @iov
+ *
+ * Assumes @iov does not contain the rfc1002 length and iov[0] has the
+ * SMB2 header.
+ */
+int
+smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec)
+{
+	int i, rc;
+	struct sdesc *d;
+	struct smb2_sync_hdr *hdr;
+
+	if (ses->server->tcpStatus == CifsGood) {
+		/* skip non smb311 connections */
+		if (ses->server->dialect != SMB311_PROT_ID)
+			return 0;
+
+		/* skip last sess setup response */
+		hdr = (struct smb2_sync_hdr *)iov[0].iov_base;
+		if (hdr->Flags & SMB2_FLAGS_SIGNED)
+			return 0;
+	}
+
+	rc = smb311_crypto_shash_allocate(ses->server);
+	if (rc)
+		return rc;
+
+	d = ses->server->secmech.sdescsha512;
+	rc = crypto_shash_init(&d->shash);
+	if (rc) {
+		cifs_dbg(VFS, "%s: could not init sha512 shash\n", __func__);
+		return rc;
+	}
+
+	rc = crypto_shash_update(&d->shash, ses->preauth_sha_hash,
+				 SMB2_PREAUTH_HASH_SIZE);
+	if (rc) {
+		cifs_dbg(VFS, "%s: could not update sha512 shash\n", __func__);
+		return rc;
+	}
+
+	for (i = 0; i < nvec; i++) {
+		rc = crypto_shash_update(&d->shash,
+					 iov[i].iov_base, iov[i].iov_len);
+		if (rc) {
+			cifs_dbg(VFS, "%s: could not update sha512 shash\n",
+				 __func__);
+			return rc;
+		}
+	}
+
+	rc = crypto_shash_final(&d->shash, ses->preauth_sha_hash);
+	if (rc) {
+		cifs_dbg(VFS, "%s: could not finalize sha512 shash\n",
+			 __func__);
+		return rc;
+	}
+
+	return 0;
+}
+#endif
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index eb68e2fcc500..968b1d43a1ea 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1412,7 +1412,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
 	} while (rc == -EAGAIN);
 
 	if (rc) {
-		if (rc != -ENOENT)
+		if ((rc != -ENOENT) && (rc != -EOPNOTSUPP))
 			cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
 		goto out;
 	}
@@ -1457,6 +1457,8 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 	unsigned int sub_offset;
 	unsigned int print_len;
 	unsigned int print_offset;
+	struct cifs_ses *ses = tcon->ses;
+	struct TCP_Server_Info *server = ses->server;
 
 	cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
 
@@ -1479,7 +1481,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 	}
 
 	if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) ||
-	    get_rfc1002_length(err_buf) + 4 < SMB2_SYMLINK_STRUCT_SIZE) {
+	    get_rfc1002_length(err_buf) + server->vals->header_preamble_size < SMB2_SYMLINK_STRUCT_SIZE) {
 		kfree(utf16_path);
 		return -ENOENT;
 	}
@@ -1492,13 +1494,13 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 	print_len = le16_to_cpu(symlink->PrintNameLength);
 	print_offset = le16_to_cpu(symlink->PrintNameOffset);
 
-	if (get_rfc1002_length(err_buf) + 4 <
+	if (get_rfc1002_length(err_buf) + server->vals->header_preamble_size <
 			SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
 		kfree(utf16_path);
 		return -ENOENT;
 	}
 
-	if (get_rfc1002_length(err_buf) + 4 <
+	if (get_rfc1002_length(err_buf) + server->vals->header_preamble_size <
 			SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
 		kfree(utf16_path);
 		return -ENOENT;
@@ -2050,7 +2052,8 @@ smb2_dir_needs_close(struct cifsFileInfo *cfile)
 }
 
 static void
-fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, struct smb_rqst *old_rq)
+fill_transform_hdr(struct TCP_Server_Info *server,
+		   struct smb2_transform_hdr *tr_hdr, struct smb_rqst *old_rq)
 {
 	struct smb2_sync_hdr *shdr =
 			(struct smb2_sync_hdr *)old_rq->rq_iov[1].iov_base;
@@ -2062,10 +2065,19 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, struct smb_rqst *old_rq)
 	tr_hdr->Flags = cpu_to_le16(0x01);
 	get_random_bytes(&tr_hdr->Nonce, SMB3_AES128CMM_NONCE);
 	memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8);
-	inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - 4);
+	inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - server->vals->header_preamble_size);
 	inc_rfc1001_len(tr_hdr, orig_len);
 }
 
+/* We can not use the normal sg_set_buf() as we will sometimes pass a
+ * stack object as buf.
+ */
+static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
+				   unsigned int buflen)
+{
+	sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
 static struct scatterlist *
 init_sg(struct smb_rqst *rqst, u8 *sign)
 {
@@ -2080,16 +2092,16 @@ init_sg(struct smb_rqst *rqst, u8 *sign)
 		return NULL;
 
 	sg_init_table(sg, sg_len);
-	sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 24, assoc_data_len);
+	smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 24, assoc_data_len);
 	for (i = 1; i < rqst->rq_nvec; i++)
-		sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
+		smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
 						rqst->rq_iov[i].iov_len);
 	for (j = 0; i < sg_len - 1; i++, j++) {
 		unsigned int len = (j < rqst->rq_npages - 1) ? rqst->rq_pagesz
 							: rqst->rq_tailsz;
 		sg_set_page(&sg[i], rqst->rq_pages[j], len, 0);
 	}
-	sg_set_buf(&sg[sg_len - 1], sign, SMB2_SIGNATURE_SIZE);
+	smb2_sg_set_buf(&sg[sg_len - 1], sign, SMB2_SIGNATURE_SIZE);
 	return sg;
 }
 
@@ -2125,7 +2137,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 {
 	struct smb2_transform_hdr *tr_hdr =
 			(struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
-	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20 - server->vals->header_preamble_size;
 	int rc = 0;
 	struct scatterlist *sg;
 	u8 sign[SMB2_SIGNATURE_SIZE] = {};
@@ -2253,7 +2265,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
 		goto err_free_iov;
 
 	/* fill the 1st iov with a transform header */
-	fill_transform_hdr(tr_hdr, old_rq);
+	fill_transform_hdr(server, tr_hdr, old_rq);
 	new_rq->rq_iov[0].iov_base = tr_hdr;
 	new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
 
@@ -2335,10 +2347,10 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
 	if (rc)
 		return rc;
 
-	memmove(buf + 4, iov[1].iov_base, buf_data_size);
+	memmove(buf + server->vals->header_preamble_size, iov[1].iov_base, buf_data_size);
 	hdr = (struct smb2_hdr *)buf;
 	hdr->smb2_buf_length = cpu_to_be32(buf_data_size + page_data_size);
-	server->total_read = buf_data_size + page_data_size + 4;
+	server->total_read = buf_data_size + page_data_size + server->vals->header_preamble_size;
 
 	return rc;
 }
@@ -2442,7 +2454,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
 		return 0;
 	}
 
-	data_offset = server->ops->read_data_offset(buf) + 4;
+	data_offset = server->ops->read_data_offset(buf) + server->vals->header_preamble_size;
 #ifdef CONFIG_CIFS_SMB_DIRECT
 	use_rdma_mr = rdata->mr;
 #endif
@@ -2538,11 +2550,12 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 	unsigned int npages;
 	struct page **pages;
 	unsigned int len;
-	unsigned int buflen = get_rfc1002_length(buf) + 4;
+	unsigned int buflen = get_rfc1002_length(buf) + server->vals->header_preamble_size;
 	int rc;
 	int i = 0;
 
-	len = min_t(unsigned int, buflen, server->vals->read_rsp_size - 4 +
+	len = min_t(unsigned int, buflen, server->vals->read_rsp_size -
+		server->vals->header_preamble_size +
 		sizeof(struct smb2_transform_hdr)) - HEADER_SIZE(server) + 1;
 
 	rc = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, len);
@@ -2550,8 +2563,9 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 		return rc;
 	server->total_read += rc;
 
-	len = le32_to_cpu(tr_hdr->OriginalMessageSize) + 4 -
-						server->vals->read_rsp_size;
+	len = le32_to_cpu(tr_hdr->OriginalMessageSize) +
+		server->vals->header_preamble_size -
+		server->vals->read_rsp_size;
 	npages = DIV_ROUND_UP(len, PAGE_SIZE);
 
 	pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
@@ -2577,7 +2591,8 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 	if (rc)
 		goto free_pages;
 
-	rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size - 4,
+	rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size -
+			      server->vals->header_preamble_size,
 			      pages, npages, len);
 	if (rc)
 		goto free_pages;
@@ -2614,7 +2629,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
 	struct mid_q_entry *mid_entry;
 
 	/* switch to large buffer if too big for a small one */
-	if (pdu_length + 4 > MAX_CIFS_SMALL_BUFFER_SIZE) {
+	if (pdu_length + server->vals->header_preamble_size > MAX_CIFS_SMALL_BUFFER_SIZE) {
 		server->large_buf = true;
 		memcpy(server->bigbuf, buf, server->total_read);
 		buf = server->bigbuf;
@@ -2622,12 +2637,13 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
 
 	/* now read the rest */
 	length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
-				pdu_length - HEADER_SIZE(server) + 1 + 4);
+				pdu_length - HEADER_SIZE(server) + 1 +
+				server->vals->header_preamble_size);
 	if (length < 0)
 		return length;
 	server->total_read += length;
 
-	buf_size = pdu_length + 4 - sizeof(struct smb2_transform_hdr);
+	buf_size = pdu_length + server->vals->header_preamble_size - sizeof(struct smb2_transform_hdr);
 	length = decrypt_raw_data(server, buf, buf_size, NULL, 0, 0);
 	if (length)
 		return length;
@@ -2656,7 +2672,7 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 	struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
 	unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
 
-	if (pdu_length + 4 < sizeof(struct smb2_transform_hdr) +
+	if (pdu_length + server->vals->header_preamble_size < sizeof(struct smb2_transform_hdr) +
 						sizeof(struct smb2_sync_hdr)) {
 		cifs_dbg(VFS, "Transform message is too small (%u)\n",
 			 pdu_length);
@@ -2665,14 +2681,14 @@ smb3_receive_transform(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 		return -ECONNABORTED;
 	}
 
-	if (pdu_length + 4 < orig_len + sizeof(struct smb2_transform_hdr)) {
+	if (pdu_length + server->vals->header_preamble_size < orig_len + sizeof(struct smb2_transform_hdr)) {
 		cifs_dbg(VFS, "Transform message is broken\n");
 		cifs_reconnect(server);
 		wake_up(&server->response_q);
 		return -ECONNABORTED;
 	}
 
-	if (pdu_length + 4 > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
+	if (pdu_length + server->vals->header_preamble_size > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
 		return receive_encrypted_read(server, mid);
 
 	return receive_encrypted_standard(server, mid);
@@ -2683,7 +2699,8 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 {
 	char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
 
-	return handle_read_data(server, mid, buf, get_rfc1002_length(buf) + 4,
+	return handle_read_data(server, mid, buf, get_rfc1002_length(buf) +
+				server->vals->header_preamble_size,
 				NULL, 0, 0);
 }
 
@@ -3088,6 +3105,7 @@ struct smb_version_values smb20_values = {
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
 	.header_size = sizeof(struct smb2_hdr),
+	.header_preamble_size = 4,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3108,6 +3126,7 @@ struct smb_version_values smb21_values = {
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
 	.header_size = sizeof(struct smb2_hdr),
+	.header_preamble_size = 4,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3128,6 +3147,7 @@ struct smb_version_values smb3any_values = {
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
 	.header_size = sizeof(struct smb2_hdr),
+	.header_preamble_size = 4,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3148,6 +3168,7 @@ struct smb_version_values smbdefault_values = {
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
 	.header_size = sizeof(struct smb2_hdr),
+	.header_preamble_size = 4,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3168,6 +3189,7 @@ struct smb_version_values smb30_values = {
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
 	.header_size = sizeof(struct smb2_hdr),
+	.header_preamble_size = 4,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3188,6 +3210,7 @@ struct smb_version_values smb302_values = {
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
 	.header_size = sizeof(struct smb2_hdr),
+	.header_preamble_size = 4,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3209,6 +3232,7 @@ struct smb_version_values smb311_values = {
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
 	.header_size = sizeof(struct smb2_hdr),
+	.header_preamble_size = 4,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 63778ac22fd9..f7741cee2a4c 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -453,6 +453,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 		return rc;
 
 	req->sync_hdr.SessionId = 0;
+#ifdef CONFIG_CIFS_SMB311
+	memset(server->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE);
+	memset(ses->preauth_sha_hash, 0, SMB2_PREAUTH_HASH_SIZE);
+#endif
 
 	if (strcmp(ses->server->vals->version_string,
 		   SMB3ANY_VERSION_STRING) == 0) {
@@ -564,6 +568,15 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 
 	/* BB: add check that dialect was valid given dialect(s) we asked for */
 
+#ifdef CONFIG_CIFS_SMB311
+	/*
+	 * Keep a copy of the hash after negprot. This hash will be
+	 * the starting hash value for all sessions made from this
+	 * server.
+	 */
+	memcpy(server->preauth_sha_hash, ses->preauth_sha_hash,
+	       SMB2_PREAUTH_HASH_SIZE);
+#endif
 	/* SMB2 only has an extended negflavor */
 	server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
 	/* set it to the maximum buffer size value we can send with 1 credit */
@@ -571,8 +584,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 			       SMB2_MAX_BUFFER_SIZE);
 	server->max_read = le32_to_cpu(rsp->MaxReadSize);
 	server->max_write = le32_to_cpu(rsp->MaxWriteSize);
-	/* BB Do we need to validate the SecurityMode? */
 	server->sec_mode = le16_to_cpu(rsp->SecurityMode);
+	if ((server->sec_mode & SMB2_SEC_MODE_FLAGS_ALL) != server->sec_mode)
+		cifs_dbg(FYI, "Server returned unexpected security mode 0x%x\n",
+				server->sec_mode);
 	server->capabilities = le32_to_cpu(rsp->Capabilities);
 	/* Internal types */
 	server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES;
@@ -621,6 +636,10 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 		return 0;
 #endif
 
+	/* In SMB3.11 preauth integrity supersedes validate negotiate */
+	if (tcon->ses->server->dialect == SMB311_PROT_ID)
+		return 0;
+
 	/*
 	 * validation ioctl must be signed, so no point sending this if we
 	 * can not sign it (ie are not known user).  Even if signing is not
@@ -1148,6 +1167,14 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
 	sess_data->buf0_type = CIFS_NO_BUFFER;
 	sess_data->nls_cp = (struct nls_table *) nls_cp;
 
+#ifdef CONFIG_CIFS_SMB311
+	/*
+	 * Initialize the session hash with the server one.
+	 */
+	memcpy(ses->preauth_sha_hash, ses->server->preauth_sha_hash,
+	       SMB2_PREAUTH_HASH_SIZE);
+#endif
+
 	while (sess_data->func)
 		sess_data->func(sess_data);
 
@@ -1280,6 +1307,11 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	iov[1].iov_base = unc_path;
 	iov[1].iov_len = unc_path_len;
 
+	/* 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 */
+	if ((ses->server->dialect == SMB311_PROT_ID) &&
+	    !encryption_required(tcon))
+		req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
+
 	rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
 	cifs_small_buf_release(req);
 	rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
@@ -1441,7 +1473,7 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
 	unsigned int remaining;
 	char *name;
 
-	data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset);
+	data_offset = (char *)rsp + server->vals->header_preamble_size + le32_to_cpu(rsp->CreateContextsOffset);
 	remaining = le32_to_cpu(rsp->CreateContextsLength);
 	cc = (struct create_context *)data_offset;
 	while (remaining >= sizeof(struct create_context)) {
@@ -1738,8 +1770,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 		rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
 						 &name_len,
 						 tcon->treeName, path);
-		if (rc)
+		if (rc) {
+			cifs_small_buf_release(req);
 			return rc;
+		}
 		req->NameLength = cpu_to_le16(name_len * 2);
 		uni_path_len = copy_size;
 		path = copy_path;
@@ -1750,8 +1784,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 		if (uni_path_len % 8 != 0) {
 			copy_size = roundup(uni_path_len, 8);
 			copy_path = kzalloc(copy_size, GFP_KERNEL);
-			if (!copy_path)
+			if (!copy_path) {
+				cifs_small_buf_release(req);
 				return -ENOMEM;
+			}
 			memcpy((char *)copy_path, (const char *)path,
 			       uni_path_len);
 			uni_path_len = copy_size;
@@ -3418,6 +3454,7 @@ static int
 build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
 		   int outbuf_len, u64 persistent_fid, u64 volatile_fid)
 {
+	struct TCP_Server_Info *server = tcon->ses->server;
 	int rc;
 	struct smb2_query_info_req *req;
 	unsigned int total_len;
@@ -3440,7 +3477,7 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon, int level,
 	req->InputBufferOffset =
 			cpu_to_le16(sizeof(struct smb2_query_info_req) - 1);
 	req->OutputBufferLength = cpu_to_le32(
-		outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - 4);
+		outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - server->vals->header_preamble_size);
 
 	iov->iov_base = (char *)req;
 	iov->iov_len = total_len;
@@ -3457,6 +3494,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
 	int rc = 0;
 	int resp_buftype;
 	struct cifs_ses *ses = tcon->ses;
+	struct TCP_Server_Info *server = ses->server;
 	struct smb2_fs_full_size_info *info = NULL;
 	int flags = 0;
 
@@ -3477,7 +3515,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
 	}
 	rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
 
-	info = (struct smb2_fs_full_size_info *)(4 /* RFC1001 len */ +
+	info = (struct smb2_fs_full_size_info *)(server->vals->header_preamble_size +
 		le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr);
 	rc = validate_buf(le16_to_cpu(rsp->OutputBufferOffset),
 			  le32_to_cpu(rsp->OutputBufferLength), &rsp->hdr,
@@ -3500,6 +3538,7 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
 	int rc = 0;
 	int resp_buftype, max_len, min_len;
 	struct cifs_ses *ses = tcon->ses;
+	struct TCP_Server_Info *server = ses->server;
 	unsigned int rsp_len, offset;
 	int flags = 0;
 
@@ -3540,15 +3579,15 @@ SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
 		goto qfsattr_exit;
 
 	if (level == FS_ATTRIBUTE_INFORMATION)
-		memcpy(&tcon->fsAttrInfo, 4 /* RFC1001 len */ + offset
+		memcpy(&tcon->fsAttrInfo, server->vals->header_preamble_size + offset
 			+ (char *)&rsp->hdr, min_t(unsigned int,
 			rsp_len, max_len));
 	else if (level == FS_DEVICE_INFORMATION)
-		memcpy(&tcon->fsDevInfo, 4 /* RFC1001 len */ + offset
+		memcpy(&tcon->fsDevInfo, server->vals->header_preamble_size + offset
 			+ (char *)&rsp->hdr, sizeof(FILE_SYSTEM_DEVICE_INFO));
 	else if (level == FS_SECTOR_SIZE_INFORMATION) {
 		struct smb3_fs_ss_info *ss_info = (struct smb3_fs_ss_info *)
-			(4 /* RFC1001 len */ + offset + (char *)&rsp->hdr);
+			(server->vals->header_preamble_size + offset + (char *)&rsp->hdr);
 		tcon->ss_flags = le32_to_cpu(ss_info->Flags);
 		tcon->perf_sector_size =
 			le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 2a2b34ccaf49..253e2c7c952f 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -249,6 +249,8 @@ struct smb2_negotiate_req {
 /* SecurityMode flags */
 #define	SMB2_NEGOTIATE_SIGNING_ENABLED	0x0001
 #define SMB2_NEGOTIATE_SIGNING_REQUIRED	0x0002
+#define SMB2_SEC_MODE_FLAGS_ALL		0x0003
+
 /* Capabilities flags */
 #define SMB2_GLOBAL_CAP_DFS		0x00000001
 #define SMB2_GLOBAL_CAP_LEASING		0x00000002 /* Resp only New to SMB2.1 */
@@ -264,6 +266,7 @@ struct smb2_negotiate_req {
 #define SMB311_SALT_SIZE			32
 /* Hash Algorithm Types */
 #define SMB2_PREAUTH_INTEGRITY_SHA512	cpu_to_le16(0x0001)
+#define SMB2_PREAUTH_HASH_SIZE 64
 
 struct smb2_preauth_neg_context {
 	__le16	ContextType; /* 1 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 05287b01f596..cbcce3f7e86f 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -202,4 +202,9 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
 
 extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
 					enum securityEnum);
+#ifdef CONFIG_CIFS_SMB311
+extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
+extern int smb311_update_preauth_hash(struct cifs_ses *ses,
+				      struct kvec *iov, int nvec);
+#endif
 #endif			/* _SMB2PROTO_H */
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 99493946e2f9..bf49cb73b9e6 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -43,77 +43,62 @@
 static int
 smb2_crypto_shash_allocate(struct TCP_Server_Info *server)
 {
-	int rc;
-	unsigned int size;
+	return cifs_alloc_hash("hmac(sha256)",
+			       &server->secmech.hmacsha256,
+			       &server->secmech.sdeschmacsha256);
+}
 
-	if (server->secmech.sdeschmacsha256 != NULL)
-		return 0; /* already allocated */
+static int
+smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
+{
+	struct cifs_secmech *p = &server->secmech;
+	int rc;
 
-	server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0);
-	if (IS_ERR(server->secmech.hmacsha256)) {
-		cifs_dbg(VFS, "could not allocate crypto hmacsha256\n");
-		rc = PTR_ERR(server->secmech.hmacsha256);
-		server->secmech.hmacsha256 = NULL;
-		return rc;
-	}
+	rc = cifs_alloc_hash("hmac(sha256)",
+			     &p->hmacsha256,
+			     &p->sdeschmacsha256);
+	if (rc)
+		goto err;
 
-	size = sizeof(struct shash_desc) +
-			crypto_shash_descsize(server->secmech.hmacsha256);
-	server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL);
-	if (!server->secmech.sdeschmacsha256) {
-		crypto_free_shash(server->secmech.hmacsha256);
-		server->secmech.hmacsha256 = NULL;
-		return -ENOMEM;
-	}
-	server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256;
-	server->secmech.sdeschmacsha256->shash.flags = 0x0;
+	rc = cifs_alloc_hash("cmac(aes)", &p->cmacaes, &p->sdesccmacaes);
+	if (rc)
+		goto err;
 
 	return 0;
+err:
+	cifs_free_hash(&p->hmacsha256, &p->sdeschmacsha256);
+	return rc;
 }
 
-static int
-smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
+#ifdef CONFIG_CIFS_SMB311
+int
+smb311_crypto_shash_allocate(struct TCP_Server_Info *server)
 {
-	unsigned int size;
-	int rc;
-
-	if (server->secmech.sdesccmacaes != NULL)
-		return 0;  /* already allocated */
+	struct cifs_secmech *p = &server->secmech;
+	int rc = 0;
 
-	rc = smb2_crypto_shash_allocate(server);
+	rc = cifs_alloc_hash("hmac(sha256)",
+			     &p->hmacsha256,
+			     &p->sdeschmacsha256);
 	if (rc)
 		return rc;
 
-	server->secmech.cmacaes = crypto_alloc_shash("cmac(aes)", 0, 0);
-	if (IS_ERR(server->secmech.cmacaes)) {
-		cifs_dbg(VFS, "could not allocate crypto cmac-aes");
-		kfree(server->secmech.sdeschmacsha256);
-		server->secmech.sdeschmacsha256 = NULL;
-		crypto_free_shash(server->secmech.hmacsha256);
-		server->secmech.hmacsha256 = NULL;
-		rc = PTR_ERR(server->secmech.cmacaes);
-		server->secmech.cmacaes = NULL;
-		return rc;
-	}
+	rc = cifs_alloc_hash("cmac(aes)", &p->cmacaes, &p->sdesccmacaes);
+	if (rc)
+		goto err;
 
-	size = sizeof(struct shash_desc) +
-			crypto_shash_descsize(server->secmech.cmacaes);
-	server->secmech.sdesccmacaes = kmalloc(size, GFP_KERNEL);
-	if (!server->secmech.sdesccmacaes) {
-		cifs_dbg(VFS, "%s: Can't alloc cmacaes\n", __func__);
-		kfree(server->secmech.sdeschmacsha256);
-		server->secmech.sdeschmacsha256 = NULL;
-		crypto_free_shash(server->secmech.hmacsha256);
-		crypto_free_shash(server->secmech.cmacaes);
-		server->secmech.hmacsha256 = NULL;
-		server->secmech.cmacaes = NULL;
-		return -ENOMEM;
-	}
-	server->secmech.sdesccmacaes->shash.tfm = server->secmech.cmacaes;
-	server->secmech.sdesccmacaes->shash.flags = 0x0;
+	rc = cifs_alloc_hash("sha512", &p->sha512, &p->sdescsha512);
+	if (rc)
+		goto err;
 
 	return 0;
+
+err:
+	cifs_free_hash(&p->cmacaes, &p->sdesccmacaes);
+	cifs_free_hash(&p->hmacsha256, &p->sdeschmacsha256);
+	return rc;
 }
+#endif
 
 static struct cifs_ses *
 smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
@@ -457,7 +442,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 		cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__);
 		return rc;
 	}
-	
+
 	rc = __cifs_calc_signature(rqst, server, sigptr,
 				   &server->secmech.sdesccmacaes->shash);
 
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 91710eb571fb..5008af546dd1 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -862,6 +862,8 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
 	ib_dma_unmap_single(info->id->device, request->sge[0].addr,
 		request->sge[0].length, DMA_TO_DEVICE);
 
+	smbd_disconnect_rdma_connection(info);
+
 dma_mapping_failed:
 	mempool_free(request, info->request_mempool);
 	return rc;
@@ -1025,7 +1027,7 @@ static int smbd_post_send(struct smbd_connection *info,
 
 	for (i = 0; i < request->num_sge; i++) {
 		log_rdma_send(INFO,
-			"rdma_request sge[%d] addr=%llu legnth=%u\n",
+			"rdma_request sge[%d] addr=%llu length=%u\n",
 			i, request->sge[0].addr, request->sge[0].length);
 		ib_dma_sync_single_for_device(
 			info->id->device,
@@ -1061,6 +1063,7 @@ static int smbd_post_send(struct smbd_connection *info,
 			if (atomic_dec_and_test(&info->send_pending))
 				wake_up(&info->wait_send_pending);
 		}
+		smbd_disconnect_rdma_connection(info);
 	} else
 		/* Reset timer for idle connection after packet is sent */
 		mod_delayed_work(info->workqueue, &info->idle_timer_work,
@@ -1202,7 +1205,7 @@ static int smbd_post_recv(
 	if (rc) {
 		ib_dma_unmap_single(info->id->device, response->sge.addr,
 				    response->sge.length, DMA_FROM_DEVICE);
-
+		smbd_disconnect_rdma_connection(info);
 		log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc);
 	}
 
@@ -1498,8 +1501,8 @@ int smbd_reconnect(struct TCP_Server_Info *server)
 	log_rdma_event(INFO, "reconnecting rdma session\n");
 
 	if (!server->smbd_conn) {
-		log_rdma_event(ERR, "rdma session already destroyed\n");
-		return -EINVAL;
+		log_rdma_event(INFO, "rdma session already destroyed\n");
+		goto create_conn;
 	}
 
 	/*
@@ -1512,15 +1515,19 @@ int smbd_reconnect(struct TCP_Server_Info *server)
 	}
 
 	/* wait until the transport is destroyed */
-	wait_event(server->smbd_conn->wait_destroy,
-		server->smbd_conn->transport_status == SMBD_DESTROYED);
+	if (!wait_event_timeout(server->smbd_conn->wait_destroy,
+		server->smbd_conn->transport_status == SMBD_DESTROYED, 5*HZ))
+		return -EAGAIN;
 
 	destroy_workqueue(server->smbd_conn->workqueue);
 	kfree(server->smbd_conn);
 
+create_conn:
 	log_rdma_event(INFO, "creating rdma session\n");
 	server->smbd_conn = smbd_get_connection(
 		server, (struct sockaddr *) &server->dstaddr);
+	log_rdma_event(INFO, "created rdma session info=%p\n",
+		server->smbd_conn);
 
 	return server->smbd_conn ? 0 : -ENOENT;
 }
@@ -2295,7 +2302,7 @@ static void smbd_mr_recovery_work(struct work_struct *work)
 				rc = ib_dereg_mr(smbdirect_mr->mr);
 				if (rc) {
 					log_rdma_mr(ERR,
-						"ib_dereg_mr faield rc=%x\n",
+						"ib_dereg_mr failed rc=%x\n",
 						rc);
 					smbd_disconnect_rdma_connection(info);
 				}
@@ -2542,6 +2549,8 @@ dma_map_error:
 	if (atomic_dec_and_test(&info->mr_used_count))
 		wake_up(&info->wait_for_mr_cleanup);
 
+	smbd_disconnect_rdma_connection(info);
+
 	return NULL;
 }
 
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index c12bffefa3c9..a0b80ac651a6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -121,25 +121,12 @@ int
 mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
 {
 	int rc;
-	unsigned int size;
-	struct crypto_shash *md4;
-	struct sdesc *sdescmd4;
-
-	md4 = crypto_alloc_shash("md4", 0, 0);
-	if (IS_ERR(md4)) {
-		rc = PTR_ERR(md4);
-		cifs_dbg(VFS, "%s: Crypto md4 allocation error %d\n",
-			 __func__, rc);
-		return rc;
-	}
-	size = sizeof(struct shash_desc) + crypto_shash_descsize(md4);
-	sdescmd4 = kmalloc(size, GFP_KERNEL);
-	if (!sdescmd4) {
-		rc = -ENOMEM;
+	struct crypto_shash *md4 = NULL;
+	struct sdesc *sdescmd4 = NULL;
+
+	rc = cifs_alloc_hash("md4", &md4, &sdescmd4);
+	if (rc)
 		goto mdfour_err;
-	}
-	sdescmd4->shash.tfm = md4;
-	sdescmd4->shash.flags = 0x0;
 
 	rc = crypto_shash_init(&sdescmd4->shash);
 	if (rc) {
@@ -156,9 +143,7 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
 		cifs_dbg(VFS, "%s: Could not generate md4 hash\n", __func__);
 
 mdfour_err:
-	crypto_free_shash(md4);
-	kfree(sdescmd4);
-
+	cifs_free_hash(&md4, &sdescmd4);
 	return rc;
 }
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 9779b3292d8e..279718dcb2ed 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -37,6 +37,7 @@
 #include "cifsglob.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
+#include "smb2proto.h"
 #include "smbdirect.h"
 
 /* Max number of iovectors we can use off the stack when sending requests. */
@@ -751,6 +752,12 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 	if (rc < 0)
 		goto out;
 
+#ifdef CONFIG_CIFS_SMB311
+	if (ses->status == CifsNew)
+		smb311_update_preauth_hash(ses, rqst->rq_iov+1,
+					   rqst->rq_nvec-1);
+#endif
+
 	if (timeout == CIFS_ASYNC_OP)
 		goto out;
 
@@ -783,12 +790,23 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 
 	buf = (char *)midQ->resp_buf;
 	resp_iov->iov_base = buf;
-	resp_iov->iov_len = get_rfc1002_length(buf) + 4;
+	resp_iov->iov_len = get_rfc1002_length(buf) +
+		ses->server->vals->header_preamble_size;
 	if (midQ->large_buf)
 		*resp_buf_type = CIFS_LARGE_BUFFER;
 	else
 		*resp_buf_type = CIFS_SMALL_BUFFER;
 
+#ifdef CONFIG_CIFS_SMB311
+	if (ses->status == CifsNew) {
+		struct kvec iov = {
+			.iov_base = buf + 4,
+			.iov_len = get_rfc1002_length(buf)
+		};
+		smb311_update_preauth_hash(ses, &iov, 1);
+	}
+#endif
+
 	credits = ses->server->ops->get_credits(midQ);
 
 	rc = ses->server->ops->check_receive(midQ, ses->server,
diff --git a/fs/d_path.c b/fs/d_path.c
new file mode 100644
index 000000000000..e8fce6b1174f
--- /dev/null
+++ b/fs/d_path.c
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/fs_struct.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/prefetch.h>
+#include "mount.h"
+
+static int prepend(char **buffer, int *buflen, const char *str, int namelen)
+{
+	*buflen -= namelen;
+	if (*buflen < 0)
+		return -ENAMETOOLONG;
+	*buffer -= namelen;
+	memcpy(*buffer, str, namelen);
+	return 0;
+}
+
+/**
+ * prepend_name - prepend a pathname in front of current buffer pointer
+ * @buffer: buffer pointer
+ * @buflen: allocated length of the buffer
+ * @name:   name string and length qstr structure
+ *
+ * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
+ * make sure that either the old or the new name pointer and length are
+ * fetched. However, there may be mismatch between length and pointer.
+ * The length cannot be trusted, we need to copy it byte-by-byte until
+ * the length is reached or a null byte is found. It also prepends "/" at
+ * the beginning of the name. The sequence number check at the caller will
+ * retry it again when a d_move() does happen. So any garbage in the buffer
+ * due to mismatched pointer and length will be discarded.
+ *
+ * Load acquire is needed to make sure that we see that terminating NUL.
+ */
+static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
+{
+	const char *dname = smp_load_acquire(&name->name); /* ^^^ */
+	u32 dlen = READ_ONCE(name->len);
+	char *p;
+
+	*buflen -= dlen + 1;
+	if (*buflen < 0)
+		return -ENAMETOOLONG;
+	p = *buffer -= dlen + 1;
+	*p++ = '/';
+	while (dlen--) {
+		char c = *dname++;
+		if (!c)
+			break;
+		*p++ = c;
+	}
+	return 0;
+}
+
+/**
+ * prepend_path - Prepend path string to a buffer
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buffer: pointer to the end of the buffer
+ * @buflen: pointer to buffer length
+ *
+ * The function will first try to write out the pathname without taking any
+ * lock other than the RCU read lock to make sure that dentries won't go away.
+ * It only checks the sequence number of the global rename_lock as any change
+ * in the dentry's d_seq will be preceded by changes in the rename_lock
+ * sequence number. If the sequence number had been changed, it will restart
+ * the whole pathname back-tracing sequence again by taking the rename_lock.
+ * In this case, there is no need to take the RCU read lock as the recursive
+ * parent pointer references will keep the dentry chain alive as long as no
+ * rename operation is performed.
+ */
+static int prepend_path(const struct path *path,
+			const struct path *root,
+			char **buffer, int *buflen)
+{
+	struct dentry *dentry;
+	struct vfsmount *vfsmnt;
+	struct mount *mnt;
+	int error = 0;
+	unsigned seq, m_seq = 0;
+	char *bptr;
+	int blen;
+
+	rcu_read_lock();
+restart_mnt:
+	read_seqbegin_or_lock(&mount_lock, &m_seq);
+	seq = 0;
+	rcu_read_lock();
+restart:
+	bptr = *buffer;
+	blen = *buflen;
+	error = 0;
+	dentry = path->dentry;
+	vfsmnt = path->mnt;
+	mnt = real_mount(vfsmnt);
+	read_seqbegin_or_lock(&rename_lock, &seq);
+	while (dentry != root->dentry || vfsmnt != root->mnt) {
+		struct dentry * parent;
+
+		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+			struct mount *parent = READ_ONCE(mnt->mnt_parent);
+			/* Escaped? */
+			if (dentry != vfsmnt->mnt_root) {
+				bptr = *buffer;
+				blen = *buflen;
+				error = 3;
+				break;
+			}
+			/* Global root? */
+			if (mnt != parent) {
+				dentry = READ_ONCE(mnt->mnt_mountpoint);
+				mnt = parent;
+				vfsmnt = &mnt->mnt;
+				continue;
+			}
+			if (!error)
+				error = is_mounted(vfsmnt) ? 1 : 2;
+			break;
+		}
+		parent = dentry->d_parent;
+		prefetch(parent);
+		error = prepend_name(&bptr, &blen, &dentry->d_name);
+		if (error)
+			break;
+
+		dentry = parent;
+	}
+	if (!(seq & 1))
+		rcu_read_unlock();
+	if (need_seqretry(&rename_lock, seq)) {
+		seq = 1;
+		goto restart;
+	}
+	done_seqretry(&rename_lock, seq);
+
+	if (!(m_seq & 1))
+		rcu_read_unlock();
+	if (need_seqretry(&mount_lock, m_seq)) {
+		m_seq = 1;
+		goto restart_mnt;
+	}
+	done_seqretry(&mount_lock, m_seq);
+
+	if (error >= 0 && bptr == *buffer) {
+		if (--blen < 0)
+			error = -ENAMETOOLONG;
+		else
+			*--bptr = '/';
+	}
+	*buffer = bptr;
+	*buflen = blen;
+	return error;
+}
+
+/**
+ * __d_path - return the path of a dentry
+ * @path: the dentry/vfsmount to report
+ * @root: root vfsmnt/dentry
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name.
+ *
+ * Returns a pointer into the buffer or an error code if the
+ * path was too long.
+ *
+ * "buflen" should be positive.
+ *
+ * If the path is not reachable from the supplied root, return %NULL.
+ */
+char *__d_path(const struct path *path,
+	       const struct path *root,
+	       char *buf, int buflen)
+{
+	char *res = buf + buflen;
+	int error;
+
+	prepend(&res, &buflen, "\0", 1);
+	error = prepend_path(path, root, &res, &buflen);
+
+	if (error < 0)
+		return ERR_PTR(error);
+	if (error > 0)
+		return NULL;
+	return res;
+}
+
+char *d_absolute_path(const struct path *path,
+	       char *buf, int buflen)
+{
+	struct path root = {};
+	char *res = buf + buflen;
+	int error;
+
+	prepend(&res, &buflen, "\0", 1);
+	error = prepend_path(path, &root, &res, &buflen);
+
+	if (error > 1)
+		error = -EINVAL;
+	if (error < 0)
+		return ERR_PTR(error);
+	return res;
+}
+
+/*
+ * same as __d_path but appends "(deleted)" for unlinked files.
+ */
+static int path_with_deleted(const struct path *path,
+			     const struct path *root,
+			     char **buf, int *buflen)
+{
+	prepend(buf, buflen, "\0", 1);
+	if (d_unlinked(path->dentry)) {
+		int error = prepend(buf, buflen, " (deleted)", 10);
+		if (error)
+			return error;
+	}
+
+	return prepend_path(path, root, buf, buflen);
+}
+
+static int prepend_unreachable(char **buffer, int *buflen)
+{
+	return prepend(buffer, buflen, "(unreachable)", 13);
+}
+
+static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
+{
+	unsigned seq;
+
+	do {
+		seq = read_seqcount_begin(&fs->seq);
+		*root = fs->root;
+	} while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/**
+ * d_path - return the path of a dentry
+ * @path: path to report
+ * @buf: buffer to return value in
+ * @buflen: buffer length
+ *
+ * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * the string " (deleted)" is appended. Note that this is ambiguous.
+ *
+ * Returns a pointer into the buffer or an error code if the path was
+ * too long. Note: Callers should use the returned pointer, not the passed
+ * in buffer, to use the name! The implementation often starts at an offset
+ * into the buffer, and may leave 0 bytes at the start.
+ *
+ * "buflen" should be positive.
+ */
+char *d_path(const struct path *path, char *buf, int buflen)
+{
+	char *res = buf + buflen;
+	struct path root;
+	int error;
+
+	/*
+	 * We have various synthetic filesystems that never get mounted.  On
+	 * these filesystems dentries are never used for lookup purposes, and
+	 * thus don't need to be hashed.  They also don't need a name until a
+	 * user wants to identify the object in /proc/pid/fd/.  The little hack
+	 * below allows us to generate a name for these objects on demand:
+	 *
+	 * Some pseudo inodes are mountable.  When they are mounted
+	 * path->dentry == path->mnt->mnt_root.  In that case don't call d_dname
+	 * and instead have d_path return the mounted path.
+	 */
+	if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+	    (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
+		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+
+	rcu_read_lock();
+	get_fs_root_rcu(current->fs, &root);
+	error = path_with_deleted(path, &root, &res, &buflen);
+	rcu_read_unlock();
+
+	if (error < 0)
+		res = ERR_PTR(error);
+	return res;
+}
+EXPORT_SYMBOL(d_path);
+
+/*
+ * Helper function for dentry_operations.d_dname() members
+ */
+char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
+			const char *fmt, ...)
+{
+	va_list args;
+	char temp[64];
+	int sz;
+
+	va_start(args, fmt);
+	sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
+	va_end(args);
+
+	if (sz > sizeof(temp) || sz > buflen)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	buffer += buflen - sz;
+	return memcpy(buffer, temp, sz);
+}
+
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	char *end = buffer + buflen;
+	/* these dentries are never renamed, so d_lock is not needed */
+	if (prepend(&end, &buflen, " (deleted)", 11) ||
+	    prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
+	    prepend(&end, &buflen, "/", 1))  
+		end = ERR_PTR(-ENAMETOOLONG);
+	return end;
+}
+EXPORT_SYMBOL(simple_dname);
+
+/*
+ * Write full pathname from the root of the filesystem into the buffer.
+ */
+static char *__dentry_path(struct dentry *d, char *buf, int buflen)
+{
+	struct dentry *dentry;
+	char *end, *retval;
+	int len, seq = 0;
+	int error = 0;
+
+	if (buflen < 2)
+		goto Elong;
+
+	rcu_read_lock();
+restart:
+	dentry = d;
+	end = buf + buflen;
+	len = buflen;
+	prepend(&end, &len, "\0", 1);
+	/* Get '/' right */
+	retval = end-1;
+	*retval = '/';
+	read_seqbegin_or_lock(&rename_lock, &seq);
+	while (!IS_ROOT(dentry)) {
+		struct dentry *parent = dentry->d_parent;
+
+		prefetch(parent);
+		error = prepend_name(&end, &len, &dentry->d_name);
+		if (error)
+			break;
+
+		retval = end;
+		dentry = parent;
+	}
+	if (!(seq & 1))
+		rcu_read_unlock();
+	if (need_seqretry(&rename_lock, seq)) {
+		seq = 1;
+		goto restart;
+	}
+	done_seqretry(&rename_lock, seq);
+	if (error)
+		goto Elong;
+	return retval;
+Elong:
+	return ERR_PTR(-ENAMETOOLONG);
+}
+
+char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
+{
+	return __dentry_path(dentry, buf, buflen);
+}
+EXPORT_SYMBOL(dentry_path_raw);
+
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+	char *p = NULL;
+	char *retval;
+
+	if (d_unlinked(dentry)) {
+		p = buf + buflen;
+		if (prepend(&p, &buflen, "//deleted", 10) != 0)
+			goto Elong;
+		buflen++;
+	}
+	retval = __dentry_path(dentry, buf, buflen);
+	if (!IS_ERR(retval) && p)
+		*p = '/';	/* restore '/' overriden with '\0' */
+	return retval;
+Elong:
+	return ERR_PTR(-ENAMETOOLONG);
+}
+
+static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
+				    struct path *pwd)
+{
+	unsigned seq;
+
+	do {
+		seq = read_seqcount_begin(&fs->seq);
+		*root = fs->root;
+		*pwd = fs->pwd;
+	} while (read_seqcount_retry(&fs->seq, seq));
+}
+
+/*
+ * NOTE! The user-level library version returns a
+ * character pointer. The kernel system call just
+ * returns the length of the buffer filled (which
+ * includes the ending '\0' character), or a negative
+ * error value. So libc would do something like
+ *
+ *	char *getcwd(char * buf, size_t size)
+ *	{
+ *		int retval;
+ *
+ *		retval = sys_getcwd(buf, size);
+ *		if (retval >= 0)
+ *			return buf;
+ *		errno = -retval;
+ *		return NULL;
+ *	}
+ */
+SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
+{
+	int error;
+	struct path pwd, root;
+	char *page = __getname();
+
+	if (!page)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
+
+	error = -ENOENT;
+	if (!d_unlinked(pwd.dentry)) {
+		unsigned long len;
+		char *cwd = page + PATH_MAX;
+		int buflen = PATH_MAX;
+
+		prepend(&cwd, &buflen, "\0", 1);
+		error = prepend_path(&pwd, &root, &cwd, &buflen);
+		rcu_read_unlock();
+
+		if (error < 0)
+			goto out;
+
+		/* Unreachable from current root */
+		if (error > 0) {
+			error = prepend_unreachable(&cwd, &buflen);
+			if (error)
+				goto out;
+		}
+
+		error = -ERANGE;
+		len = PATH_MAX + page - cwd;
+		if (len <= size) {
+			error = len;
+			if (copy_to_user(buf, cwd, len))
+				error = -EFAULT;
+		}
+	} else {
+		rcu_read_unlock();
+	}
+
+out:
+	__putname(page);
+	return error;
+}
diff --git a/fs/dcache.c b/fs/dcache.c
index 8945e6cabd93..593079176123 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -14,7 +14,7 @@
  * the dcache entry is deleted or garbage collected.
  */
 
-#include <linux/syscalls.h>
+#include <linux/ratelimit.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
@@ -24,18 +24,11 @@
 #include <linux/hash.h>
 #include <linux/cache.h>
 #include <linux/export.h>
-#include <linux/mount.h>
-#include <linux/file.h>
-#include <linux/uaccess.h>
 #include <linux/security.h>
 #include <linux/seqlock.h>
-#include <linux/swap.h>
 #include <linux/bootmem.h>
-#include <linux/fs_struct.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rculist_bl.h>
-#include <linux/prefetch.h>
-#include <linux/ratelimit.h>
 #include <linux/list_lru.h>
 #include "internal.h"
 #include "mount.h"
@@ -74,9 +67,7 @@
  *       dentry->d_lock
  *
  * If no ancestor relationship:
- * if (dentry1 < dentry2)
- *   dentry1->d_lock
- *     dentry2->d_lock
+ * arbitrary, since it's serialized on rename_lock
  */
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
@@ -440,17 +431,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
 	list_lru_isolate_move(lru, &dentry->d_lru, list);
 }
 
-/*
- * dentry_lru_(add|del)_list) must be called with d_lock held.
- */
-static void dentry_lru_add(struct dentry *dentry)
-{
-	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
-		d_lru_add(dentry);
-	else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
-		dentry->d_flags |= DCACHE_REFERENCED;
-}
-
 /**
  * d_drop - drop a dentry
  * @dentry: dentry to drop
@@ -470,30 +450,29 @@ static void dentry_lru_add(struct dentry *dentry)
  */
 static void ___d_drop(struct dentry *dentry)
 {
-	if (!d_unhashed(dentry)) {
-		struct hlist_bl_head *b;
-		/*
-		 * Hashed dentries are normally on the dentry hashtable,
-		 * with the exception of those newly allocated by
-		 * d_obtain_root, which are always IS_ROOT:
-		 */
-		if (unlikely(IS_ROOT(dentry)))
-			b = &dentry->d_sb->s_roots;
-		else
-			b = d_hash(dentry->d_name.hash);
+	struct hlist_bl_head *b;
+	/*
+	 * Hashed dentries are normally on the dentry hashtable,
+	 * with the exception of those newly allocated by
+	 * d_obtain_root, which are always IS_ROOT:
+	 */
+	if (unlikely(IS_ROOT(dentry)))
+		b = &dentry->d_sb->s_roots;
+	else
+		b = d_hash(dentry->d_name.hash);
 
-		hlist_bl_lock(b);
-		__hlist_bl_del(&dentry->d_hash);
-		hlist_bl_unlock(b);
-		/* After this call, in-progress rcu-walk path lookup will fail. */
-		write_seqcount_invalidate(&dentry->d_seq);
-	}
+	hlist_bl_lock(b);
+	__hlist_bl_del(&dentry->d_hash);
+	hlist_bl_unlock(b);
 }
 
 void __d_drop(struct dentry *dentry)
 {
-	___d_drop(dentry);
-	dentry->d_hash.pprev = NULL;
+	if (!d_unhashed(dentry)) {
+		___d_drop(dentry);
+		dentry->d_hash.pprev = NULL;
+		write_seqcount_invalidate(&dentry->d_seq);
+	}
 }
 EXPORT_SYMBOL(__d_drop);
 
@@ -589,10 +568,71 @@ static void __dentry_kill(struct dentry *dentry)
 		dentry_free(dentry);
 }
 
+static struct dentry *__lock_parent(struct dentry *dentry)
+{
+	struct dentry *parent;
+	rcu_read_lock();
+	spin_unlock(&dentry->d_lock);
+again:
+	parent = READ_ONCE(dentry->d_parent);
+	spin_lock(&parent->d_lock);
+	/*
+	 * We can't blindly lock dentry until we are sure
+	 * that we won't violate the locking order.
+	 * Any changes of dentry->d_parent must have
+	 * been done with parent->d_lock held, so
+	 * spin_lock() above is enough of a barrier
+	 * for checking if it's still our child.
+	 */
+	if (unlikely(parent != dentry->d_parent)) {
+		spin_unlock(&parent->d_lock);
+		goto again;
+	}
+	rcu_read_unlock();
+	if (parent != dentry)
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+	else
+		parent = NULL;
+	return parent;
+}
+
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+	struct dentry *parent = dentry->d_parent;
+	if (IS_ROOT(dentry))
+		return NULL;
+	if (likely(spin_trylock(&parent->d_lock)))
+		return parent;
+	return __lock_parent(dentry);
+}
+
+static inline bool retain_dentry(struct dentry *dentry)
+{
+	WARN_ON(d_in_lookup(dentry));
+
+	/* Unreachable? Get rid of it */
+	if (unlikely(d_unhashed(dentry)))
+		return false;
+
+	if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+		return false;
+
+	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
+		if (dentry->d_op->d_delete(dentry))
+			return false;
+	}
+	/* retain; LRU fodder */
+	dentry->d_lockref.count--;
+	if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
+		d_lru_add(dentry);
+	else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
+		dentry->d_flags |= DCACHE_REFERENCED;
+	return true;
+}
+
 /*
  * Finish off a dentry we've decided to kill.
  * dentry->d_lock must be held, returns with it unlocked.
- * If ref is non-zero, then decrement the refcount too.
  * Returns dentry requiring refcount drop, or NULL if we're done.
  */
 static struct dentry *dentry_kill(struct dentry *dentry)
@@ -602,62 +642,43 @@ static struct dentry *dentry_kill(struct dentry *dentry)
 	struct dentry *parent = NULL;
 
 	if (inode && unlikely(!spin_trylock(&inode->i_lock)))
-		goto failed;
+		goto slow_positive;
 
 	if (!IS_ROOT(dentry)) {
 		parent = dentry->d_parent;
 		if (unlikely(!spin_trylock(&parent->d_lock))) {
-			if (inode)
-				spin_unlock(&inode->i_lock);
-			goto failed;
+			parent = __lock_parent(dentry);
+			if (likely(inode || !dentry->d_inode))
+				goto got_locks;
+			/* negative that became positive */
+			if (parent)
+				spin_unlock(&parent->d_lock);
+			inode = dentry->d_inode;
+			goto slow_positive;
 		}
 	}
-
 	__dentry_kill(dentry);
 	return parent;
 
-failed:
+slow_positive:
 	spin_unlock(&dentry->d_lock);
-	return dentry; /* try again with same dentry */
-}
-
-static inline struct dentry *lock_parent(struct dentry *dentry)
-{
-	struct dentry *parent = dentry->d_parent;
-	if (IS_ROOT(dentry))
-		return NULL;
-	if (unlikely(dentry->d_lockref.count < 0))
-		return NULL;
-	if (likely(spin_trylock(&parent->d_lock)))
+	spin_lock(&inode->i_lock);
+	spin_lock(&dentry->d_lock);
+	parent = lock_parent(dentry);
+got_locks:
+	if (unlikely(dentry->d_lockref.count != 1)) {
+		dentry->d_lockref.count--;
+	} else if (likely(!retain_dentry(dentry))) {
+		__dentry_kill(dentry);
 		return parent;
-	rcu_read_lock();
-	spin_unlock(&dentry->d_lock);
-again:
-	parent = READ_ONCE(dentry->d_parent);
-	spin_lock(&parent->d_lock);
-	/*
-	 * We can't blindly lock dentry until we are sure
-	 * that we won't violate the locking order.
-	 * Any changes of dentry->d_parent must have
-	 * been done with parent->d_lock held, so
-	 * spin_lock() above is enough of a barrier
-	 * for checking if it's still our child.
-	 */
-	if (unlikely(parent != dentry->d_parent)) {
-		spin_unlock(&parent->d_lock);
-		goto again;
 	}
-	if (parent != dentry) {
-		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-		if (unlikely(dentry->d_lockref.count < 0)) {
-			spin_unlock(&parent->d_lock);
-			parent = NULL;
-		}
-	} else {
-		parent = NULL;
-	}
-	rcu_read_unlock();
-	return parent;
+	/* we are keeping it, after all */
+	if (inode)
+		spin_unlock(&inode->i_lock);
+	if (parent)
+		spin_unlock(&parent->d_lock);
+	spin_unlock(&dentry->d_lock);
+	return NULL;
 }
 
 /*
@@ -807,27 +828,11 @@ repeat:
 	/* Slow case: now with the dentry lock held */
 	rcu_read_unlock();
 
-	WARN_ON(d_in_lookup(dentry));
-
-	/* Unreachable? Get rid of it */
-	if (unlikely(d_unhashed(dentry)))
-		goto kill_it;
-
-	if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
-		goto kill_it;
-
-	if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
-		if (dentry->d_op->d_delete(dentry))
-			goto kill_it;
+	if (likely(retain_dentry(dentry))) {
+		spin_unlock(&dentry->d_lock);
+		return;
 	}
 
-	dentry_lru_add(dentry);
-
-	dentry->d_lockref.count--;
-	spin_unlock(&dentry->d_lock);
-	return;
-
-kill_it:
 	dentry = dentry_kill(dentry);
 	if (dentry) {
 		cond_resched();
@@ -976,56 +981,83 @@ restart:
 }
 EXPORT_SYMBOL(d_prune_aliases);
 
-static void shrink_dentry_list(struct list_head *list)
+/*
+ * Lock a dentry from shrink list.
+ * Called under rcu_read_lock() and dentry->d_lock; the former
+ * guarantees that nothing we access will be freed under us.
+ * Note that dentry is *not* protected from concurrent dentry_kill(),
+ * d_delete(), etc.
+ *
+ * Return false if dentry has been disrupted or grabbed, leaving
+ * the caller to kick it off-list.  Otherwise, return true and have
+ * that dentry's inode and parent both locked.
+ */
+static bool shrink_lock_dentry(struct dentry *dentry)
 {
-	struct dentry *dentry, *parent;
+	struct inode *inode;
+	struct dentry *parent;
 
-	while (!list_empty(list)) {
-		struct inode *inode;
-		dentry = list_entry(list->prev, struct dentry, d_lru);
+	if (dentry->d_lockref.count)
+		return false;
+
+	inode = dentry->d_inode;
+	if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
+		spin_unlock(&dentry->d_lock);
+		spin_lock(&inode->i_lock);
 		spin_lock(&dentry->d_lock);
-		parent = lock_parent(dentry);
+		if (unlikely(dentry->d_lockref.count))
+			goto out;
+		/* changed inode means that somebody had grabbed it */
+		if (unlikely(inode != dentry->d_inode))
+			goto out;
+	}
 
-		/*
-		 * The dispose list is isolated and dentries are not accounted
-		 * to the LRU here, so we can simply remove it from the list
-		 * here regardless of whether it is referenced or not.
-		 */
-		d_shrink_del(dentry);
+	parent = dentry->d_parent;
+	if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
+		return true;
 
-		/*
-		 * We found an inuse dentry which was not removed from
-		 * the LRU because of laziness during lookup. Do not free it.
-		 */
-		if (dentry->d_lockref.count > 0) {
-			spin_unlock(&dentry->d_lock);
-			if (parent)
-				spin_unlock(&parent->d_lock);
-			continue;
-		}
+	spin_unlock(&dentry->d_lock);
+	spin_lock(&parent->d_lock);
+	if (unlikely(parent != dentry->d_parent)) {
+		spin_unlock(&parent->d_lock);
+		spin_lock(&dentry->d_lock);
+		goto out;
+	}
+	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+	if (likely(!dentry->d_lockref.count))
+		return true;
+	spin_unlock(&parent->d_lock);
+out:
+	if (inode)
+		spin_unlock(&inode->i_lock);
+	return false;
+}
 
+static void shrink_dentry_list(struct list_head *list)
+{
+	while (!list_empty(list)) {
+		struct dentry *dentry, *parent;
 
-		if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
-			bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
+		dentry = list_entry(list->prev, struct dentry, d_lru);
+		spin_lock(&dentry->d_lock);
+		rcu_read_lock();
+		if (!shrink_lock_dentry(dentry)) {
+			bool can_free = false;
+			rcu_read_unlock();
+			d_shrink_del(dentry);
+			if (dentry->d_lockref.count < 0)
+				can_free = dentry->d_flags & DCACHE_MAY_FREE;
 			spin_unlock(&dentry->d_lock);
-			if (parent)
-				spin_unlock(&parent->d_lock);
 			if (can_free)
 				dentry_free(dentry);
 			continue;
 		}
-
-		inode = dentry->d_inode;
-		if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
-			d_shrink_add(dentry, list);
-			spin_unlock(&dentry->d_lock);
-			if (parent)
-				spin_unlock(&parent->d_lock);
-			continue;
-		}
-
+		rcu_read_unlock();
+		d_shrink_del(dentry);
+		parent = dentry->d_parent;
 		__dentry_kill(dentry);
-
+		if (parent == dentry)
+			continue;
 		/*
 		 * We need to prune ancestors too. This is necessary to prevent
 		 * quadratic behavior of shrink_dcache_parent(), but is also
@@ -1033,26 +1065,8 @@ static void shrink_dentry_list(struct list_head *list)
 		 * fragmentation.
 		 */
 		dentry = parent;
-		while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
-			parent = lock_parent(dentry);
-			if (dentry->d_lockref.count != 1) {
-				dentry->d_lockref.count--;
-				spin_unlock(&dentry->d_lock);
-				if (parent)
-					spin_unlock(&parent->d_lock);
-				break;
-			}
-			inode = dentry->d_inode;	/* can't be NULL */
-			if (unlikely(!spin_trylock(&inode->i_lock))) {
-				spin_unlock(&dentry->d_lock);
-				if (parent)
-					spin_unlock(&parent->d_lock);
-				cpu_relax();
-				continue;
-			}
-			__dentry_kill(dentry);
-			dentry = parent;
-		}
+		while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
+			dentry = dentry_kill(dentry);
 	}
 }
 
@@ -2379,32 +2393,22 @@ EXPORT_SYMBOL(d_hash_and_lookup);
  
 void d_delete(struct dentry * dentry)
 {
-	struct inode *inode;
-	int isdir = 0;
+	struct inode *inode = dentry->d_inode;
+	int isdir = d_is_dir(dentry);
+
+	spin_lock(&inode->i_lock);
+	spin_lock(&dentry->d_lock);
 	/*
 	 * Are we the only user?
 	 */
-again:
-	spin_lock(&dentry->d_lock);
-	inode = dentry->d_inode;
-	isdir = S_ISDIR(inode->i_mode);
 	if (dentry->d_lockref.count == 1) {
-		if (!spin_trylock(&inode->i_lock)) {
-			spin_unlock(&dentry->d_lock);
-			cpu_relax();
-			goto again;
-		}
 		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
 		dentry_unlink_inode(dentry);
-		fsnotify_nameremove(dentry, isdir);
-		return;
-	}
-
-	if (!d_unhashed(dentry))
+	} else {
 		__d_drop(dentry);
-
-	spin_unlock(&dentry->d_lock);
-
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&inode->i_lock);
+	}
 	fsnotify_nameremove(dentry, isdir);
 }
 EXPORT_SYMBOL(d_delete);
@@ -2769,57 +2773,6 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
 		kfree_rcu(old_name, u.head);
 }
 
-static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
-{
-	/*
-	 * XXXX: do we really need to take target->d_lock?
-	 */
-	if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
-		spin_lock(&target->d_parent->d_lock);
-	else {
-		if (d_ancestor(dentry->d_parent, target->d_parent)) {
-			spin_lock(&dentry->d_parent->d_lock);
-			spin_lock_nested(&target->d_parent->d_lock,
-						DENTRY_D_LOCK_NESTED);
-		} else {
-			spin_lock(&target->d_parent->d_lock);
-			spin_lock_nested(&dentry->d_parent->d_lock,
-						DENTRY_D_LOCK_NESTED);
-		}
-	}
-	if (target < dentry) {
-		spin_lock_nested(&target->d_lock, 2);
-		spin_lock_nested(&dentry->d_lock, 3);
-	} else {
-		spin_lock_nested(&dentry->d_lock, 2);
-		spin_lock_nested(&target->d_lock, 3);
-	}
-}
-
-static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
-{
-	if (target->d_parent != dentry->d_parent)
-		spin_unlock(&dentry->d_parent->d_lock);
-	if (target->d_parent != target)
-		spin_unlock(&target->d_parent->d_lock);
-	spin_unlock(&target->d_lock);
-	spin_unlock(&dentry->d_lock);
-}
-
-/*
- * When switching names, the actual string doesn't strictly have to
- * be preserved in the target - because we're dropping the target
- * anyway. As such, we can just do a simple memcpy() to copy over
- * the new name before we switch, unless we are going to rehash
- * it.  Note that if we *do* unhash the target, we are not allowed
- * to rehash it without giving it a new name/hash key - whether
- * we swap or overwrite the names here, resulting name won't match
- * the reality in filesystem; it's only there for d_path() purposes.
- * Note that all of this is happening under rename_lock, so the
- * any hash lookup seeing it in the middle of manipulations will
- * be discarded anyway.  So we do not care what happens to the hash
- * key in that case.
- */
 /*
  * __d_move - move a dentry
  * @dentry: entry to move
@@ -2834,15 +2787,34 @@ static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
 static void __d_move(struct dentry *dentry, struct dentry *target,
 		     bool exchange)
 {
+	struct dentry *old_parent, *p;
 	struct inode *dir = NULL;
 	unsigned n;
-	if (!dentry->d_inode)
-		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
-	BUG_ON(d_ancestor(dentry, target));
+	WARN_ON(!dentry->d_inode);
+	if (WARN_ON(dentry == target))
+		return;
+
 	BUG_ON(d_ancestor(target, dentry));
+	old_parent = dentry->d_parent;
+	p = d_ancestor(old_parent, target);
+	if (IS_ROOT(dentry)) {
+		BUG_ON(p);
+		spin_lock(&target->d_parent->d_lock);
+	} else if (!p) {
+		/* target is not a descendent of dentry->d_parent */
+		spin_lock(&target->d_parent->d_lock);
+		spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
+	} else {
+		BUG_ON(p == dentry);
+		spin_lock(&old_parent->d_lock);
+		if (p != target)
+			spin_lock_nested(&target->d_parent->d_lock,
+					DENTRY_D_LOCK_NESTED);
+	}
+	spin_lock_nested(&dentry->d_lock, 2);
+	spin_lock_nested(&target->d_lock, 3);
 
-	dentry_lock_for_move(dentry, target);
 	if (unlikely(d_in_lookup(target))) {
 		dir = target->d_parent->d_inode;
 		n = start_dir_add(dir);
@@ -2853,47 +2825,44 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
 	write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
 
 	/* unhash both */
-	/* ___d_drop does write_seqcount_barrier, but they're OK to nest. */
-	___d_drop(dentry);
-	___d_drop(target);
+	if (!d_unhashed(dentry))
+		___d_drop(dentry);
+	if (!d_unhashed(target))
+		___d_drop(target);
 
-	/* Switch the names.. */
-	if (exchange)
-		swap_names(dentry, target);
-	else
+	/* ... and switch them in the tree */
+	dentry->d_parent = target->d_parent;
+	if (!exchange) {
 		copy_name(dentry, target);
-
-	/* rehash in new place(s) */
-	__d_rehash(dentry);
-	if (exchange)
-		__d_rehash(target);
-	else
 		target->d_hash.pprev = NULL;
-
-	/* ... and switch them in the tree */
-	if (IS_ROOT(dentry)) {
-		/* splicing a tree */
-		dentry->d_flags |= DCACHE_RCUACCESS;
-		dentry->d_parent = target->d_parent;
-		target->d_parent = target;
-		list_del_init(&target->d_child);
-		list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
+		dentry->d_parent->d_lockref.count++;
+		if (dentry == old_parent)
+			dentry->d_flags |= DCACHE_RCUACCESS;
+		else
+			WARN_ON(!--old_parent->d_lockref.count);
 	} else {
-		/* swapping two dentries */
-		swap(dentry->d_parent, target->d_parent);
+		target->d_parent = old_parent;
+		swap_names(dentry, target);
 		list_move(&target->d_child, &target->d_parent->d_subdirs);
-		list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
-		if (exchange)
-			fsnotify_update_flags(target);
-		fsnotify_update_flags(dentry);
+		__d_rehash(target);
+		fsnotify_update_flags(target);
 	}
+	list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
+	__d_rehash(dentry);
+	fsnotify_update_flags(dentry);
 
 	write_seqcount_end(&target->d_seq);
 	write_seqcount_end(&dentry->d_seq);
 
 	if (dir)
 		end_dir_add(dir, n);
-	dentry_unlock_for_move(dentry, target);
+
+	if (dentry->d_parent != old_parent)
+		spin_unlock(&dentry->d_parent->d_lock);
+	if (dentry != old_parent)
+		spin_unlock(&old_parent->d_lock);
+	spin_unlock(&target->d_lock);
+	spin_unlock(&dentry->d_lock);
 }
 
 /*
@@ -3041,12 +3010,14 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 					inode->i_sb->s_type->name,
 					inode->i_sb->s_id);
 			} else if (!IS_ROOT(new)) {
+				struct dentry *old_parent = dget(new->d_parent);
 				int err = __d_unalias(inode, dentry, new);
 				write_sequnlock(&rename_lock);
 				if (err) {
 					dput(new);
 					new = ERR_PTR(err);
 				}
+				dput(old_parent);
 			} else {
 				__d_move(new, dentry, false);
 				write_sequnlock(&rename_lock);
@@ -3061,467 +3032,6 @@ out:
 }
 EXPORT_SYMBOL(d_splice_alias);
 
-static int prepend(char **buffer, int *buflen, const char *str, int namelen)
-{
-	*buflen -= namelen;
-	if (*buflen < 0)
-		return -ENAMETOOLONG;
-	*buffer -= namelen;
-	memcpy(*buffer, str, namelen);
-	return 0;
-}
-
-/**
- * prepend_name - prepend a pathname in front of current buffer pointer
- * @buffer: buffer pointer
- * @buflen: allocated length of the buffer
- * @name:   name string and length qstr structure
- *
- * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
- * make sure that either the old or the new name pointer and length are
- * fetched. However, there may be mismatch between length and pointer.
- * The length cannot be trusted, we need to copy it byte-by-byte until
- * the length is reached or a null byte is found. It also prepends "/" at
- * the beginning of the name. The sequence number check at the caller will
- * retry it again when a d_move() does happen. So any garbage in the buffer
- * due to mismatched pointer and length will be discarded.
- *
- * Load acquire is needed to make sure that we see that terminating NUL.
- */
-static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
-{
-	const char *dname = smp_load_acquire(&name->name); /* ^^^ */
-	u32 dlen = READ_ONCE(name->len);
-	char *p;
-
-	*buflen -= dlen + 1;
-	if (*buflen < 0)
-		return -ENAMETOOLONG;
-	p = *buffer -= dlen + 1;
-	*p++ = '/';
-	while (dlen--) {
-		char c = *dname++;
-		if (!c)
-			break;
-		*p++ = c;
-	}
-	return 0;
-}
-
-/**
- * prepend_path - Prepend path string to a buffer
- * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry
- * @buffer: pointer to the end of the buffer
- * @buflen: pointer to buffer length
- *
- * The function will first try to write out the pathname without taking any
- * lock other than the RCU read lock to make sure that dentries won't go away.
- * It only checks the sequence number of the global rename_lock as any change
- * in the dentry's d_seq will be preceded by changes in the rename_lock
- * sequence number. If the sequence number had been changed, it will restart
- * the whole pathname back-tracing sequence again by taking the rename_lock.
- * In this case, there is no need to take the RCU read lock as the recursive
- * parent pointer references will keep the dentry chain alive as long as no
- * rename operation is performed.
- */
-static int prepend_path(const struct path *path,
-			const struct path *root,
-			char **buffer, int *buflen)
-{
-	struct dentry *dentry;
-	struct vfsmount *vfsmnt;
-	struct mount *mnt;
-	int error = 0;
-	unsigned seq, m_seq = 0;
-	char *bptr;
-	int blen;
-
-	rcu_read_lock();
-restart_mnt:
-	read_seqbegin_or_lock(&mount_lock, &m_seq);
-	seq = 0;
-	rcu_read_lock();
-restart:
-	bptr = *buffer;
-	blen = *buflen;
-	error = 0;
-	dentry = path->dentry;
-	vfsmnt = path->mnt;
-	mnt = real_mount(vfsmnt);
-	read_seqbegin_or_lock(&rename_lock, &seq);
-	while (dentry != root->dentry || vfsmnt != root->mnt) {
-		struct dentry * parent;
-
-		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-			struct mount *parent = READ_ONCE(mnt->mnt_parent);
-			/* Escaped? */
-			if (dentry != vfsmnt->mnt_root) {
-				bptr = *buffer;
-				blen = *buflen;
-				error = 3;
-				break;
-			}
-			/* Global root? */
-			if (mnt != parent) {
-				dentry = READ_ONCE(mnt->mnt_mountpoint);
-				mnt = parent;
-				vfsmnt = &mnt->mnt;
-				continue;
-			}
-			if (!error)
-				error = is_mounted(vfsmnt) ? 1 : 2;
-			break;
-		}
-		parent = dentry->d_parent;
-		prefetch(parent);
-		error = prepend_name(&bptr, &blen, &dentry->d_name);
-		if (error)
-			break;
-
-		dentry = parent;
-	}
-	if (!(seq & 1))
-		rcu_read_unlock();
-	if (need_seqretry(&rename_lock, seq)) {
-		seq = 1;
-		goto restart;
-	}
-	done_seqretry(&rename_lock, seq);
-
-	if (!(m_seq & 1))
-		rcu_read_unlock();
-	if (need_seqretry(&mount_lock, m_seq)) {
-		m_seq = 1;
-		goto restart_mnt;
-	}
-	done_seqretry(&mount_lock, m_seq);
-
-	if (error >= 0 && bptr == *buffer) {
-		if (--blen < 0)
-			error = -ENAMETOOLONG;
-		else
-			*--bptr = '/';
-	}
-	*buffer = bptr;
-	*buflen = blen;
-	return error;
-}
-
-/**
- * __d_path - return the path of a dentry
- * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name.
- *
- * Returns a pointer into the buffer or an error code if the
- * path was too long.
- *
- * "buflen" should be positive.
- *
- * If the path is not reachable from the supplied root, return %NULL.
- */
-char *__d_path(const struct path *path,
-	       const struct path *root,
-	       char *buf, int buflen)
-{
-	char *res = buf + buflen;
-	int error;
-
-	prepend(&res, &buflen, "\0", 1);
-	error = prepend_path(path, root, &res, &buflen);
-
-	if (error < 0)
-		return ERR_PTR(error);
-	if (error > 0)
-		return NULL;
-	return res;
-}
-
-char *d_absolute_path(const struct path *path,
-	       char *buf, int buflen)
-{
-	struct path root = {};
-	char *res = buf + buflen;
-	int error;
-
-	prepend(&res, &buflen, "\0", 1);
-	error = prepend_path(path, &root, &res, &buflen);
-
-	if (error > 1)
-		error = -EINVAL;
-	if (error < 0)
-		return ERR_PTR(error);
-	return res;
-}
-
-/*
- * same as __d_path but appends "(deleted)" for unlinked files.
- */
-static int path_with_deleted(const struct path *path,
-			     const struct path *root,
-			     char **buf, int *buflen)
-{
-	prepend(buf, buflen, "\0", 1);
-	if (d_unlinked(path->dentry)) {
-		int error = prepend(buf, buflen, " (deleted)", 10);
-		if (error)
-			return error;
-	}
-
-	return prepend_path(path, root, buf, buflen);
-}
-
-static int prepend_unreachable(char **buffer, int *buflen)
-{
-	return prepend(buffer, buflen, "(unreachable)", 13);
-}
-
-static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
-{
-	unsigned seq;
-
-	do {
-		seq = read_seqcount_begin(&fs->seq);
-		*root = fs->root;
-	} while (read_seqcount_retry(&fs->seq, seq));
-}
-
-/**
- * d_path - return the path of a dentry
- * @path: path to report
- * @buf: buffer to return value in
- * @buflen: buffer length
- *
- * Convert a dentry into an ASCII path name. If the entry has been deleted
- * the string " (deleted)" is appended. Note that this is ambiguous.
- *
- * Returns a pointer into the buffer or an error code if the path was
- * too long. Note: Callers should use the returned pointer, not the passed
- * in buffer, to use the name! The implementation often starts at an offset
- * into the buffer, and may leave 0 bytes at the start.
- *
- * "buflen" should be positive.
- */
-char *d_path(const struct path *path, char *buf, int buflen)
-{
-	char *res = buf + buflen;
-	struct path root;
-	int error;
-
-	/*
-	 * We have various synthetic filesystems that never get mounted.  On
-	 * these filesystems dentries are never used for lookup purposes, and
-	 * thus don't need to be hashed.  They also don't need a name until a
-	 * user wants to identify the object in /proc/pid/fd/.  The little hack
-	 * below allows us to generate a name for these objects on demand:
-	 *
-	 * Some pseudo inodes are mountable.  When they are mounted
-	 * path->dentry == path->mnt->mnt_root.  In that case don't call d_dname
-	 * and instead have d_path return the mounted path.
-	 */
-	if (path->dentry->d_op && path->dentry->d_op->d_dname &&
-	    (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
-		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
-
-	rcu_read_lock();
-	get_fs_root_rcu(current->fs, &root);
-	error = path_with_deleted(path, &root, &res, &buflen);
-	rcu_read_unlock();
-
-	if (error < 0)
-		res = ERR_PTR(error);
-	return res;
-}
-EXPORT_SYMBOL(d_path);
-
-/*
- * Helper function for dentry_operations.d_dname() members
- */
-char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
-			const char *fmt, ...)
-{
-	va_list args;
-	char temp[64];
-	int sz;
-
-	va_start(args, fmt);
-	sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
-	va_end(args);
-
-	if (sz > sizeof(temp) || sz > buflen)
-		return ERR_PTR(-ENAMETOOLONG);
-
-	buffer += buflen - sz;
-	return memcpy(buffer, temp, sz);
-}
-
-char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
-{
-	char *end = buffer + buflen;
-	/* these dentries are never renamed, so d_lock is not needed */
-	if (prepend(&end, &buflen, " (deleted)", 11) ||
-	    prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
-	    prepend(&end, &buflen, "/", 1))  
-		end = ERR_PTR(-ENAMETOOLONG);
-	return end;
-}
-EXPORT_SYMBOL(simple_dname);
-
-/*
- * Write full pathname from the root of the filesystem into the buffer.
- */
-static char *__dentry_path(struct dentry *d, char *buf, int buflen)
-{
-	struct dentry *dentry;
-	char *end, *retval;
-	int len, seq = 0;
-	int error = 0;
-
-	if (buflen < 2)
-		goto Elong;
-
-	rcu_read_lock();
-restart:
-	dentry = d;
-	end = buf + buflen;
-	len = buflen;
-	prepend(&end, &len, "\0", 1);
-	/* Get '/' right */
-	retval = end-1;
-	*retval = '/';
-	read_seqbegin_or_lock(&rename_lock, &seq);
-	while (!IS_ROOT(dentry)) {
-		struct dentry *parent = dentry->d_parent;
-
-		prefetch(parent);
-		error = prepend_name(&end, &len, &dentry->d_name);
-		if (error)
-			break;
-
-		retval = end;
-		dentry = parent;
-	}
-	if (!(seq & 1))
-		rcu_read_unlock();
-	if (need_seqretry(&rename_lock, seq)) {
-		seq = 1;
-		goto restart;
-	}
-	done_seqretry(&rename_lock, seq);
-	if (error)
-		goto Elong;
-	return retval;
-Elong:
-	return ERR_PTR(-ENAMETOOLONG);
-}
-
-char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
-{
-	return __dentry_path(dentry, buf, buflen);
-}
-EXPORT_SYMBOL(dentry_path_raw);
-
-char *dentry_path(struct dentry *dentry, char *buf, int buflen)
-{
-	char *p = NULL;
-	char *retval;
-
-	if (d_unlinked(dentry)) {
-		p = buf + buflen;
-		if (prepend(&p, &buflen, "//deleted", 10) != 0)
-			goto Elong;
-		buflen++;
-	}
-	retval = __dentry_path(dentry, buf, buflen);
-	if (!IS_ERR(retval) && p)
-		*p = '/';	/* restore '/' overriden with '\0' */
-	return retval;
-Elong:
-	return ERR_PTR(-ENAMETOOLONG);
-}
-
-static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
-				    struct path *pwd)
-{
-	unsigned seq;
-
-	do {
-		seq = read_seqcount_begin(&fs->seq);
-		*root = fs->root;
-		*pwd = fs->pwd;
-	} while (read_seqcount_retry(&fs->seq, seq));
-}
-
-/*
- * NOTE! The user-level library version returns a
- * character pointer. The kernel system call just
- * returns the length of the buffer filled (which
- * includes the ending '\0' character), or a negative
- * error value. So libc would do something like
- *
- *	char *getcwd(char * buf, size_t size)
- *	{
- *		int retval;
- *
- *		retval = sys_getcwd(buf, size);
- *		if (retval >= 0)
- *			return buf;
- *		errno = -retval;
- *		return NULL;
- *	}
- */
-SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
-{
-	int error;
-	struct path pwd, root;
-	char *page = __getname();
-
-	if (!page)
-		return -ENOMEM;
-
-	rcu_read_lock();
-	get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
-
-	error = -ENOENT;
-	if (!d_unlinked(pwd.dentry)) {
-		unsigned long len;
-		char *cwd = page + PATH_MAX;
-		int buflen = PATH_MAX;
-
-		prepend(&cwd, &buflen, "\0", 1);
-		error = prepend_path(&pwd, &root, &cwd, &buflen);
-		rcu_read_unlock();
-
-		if (error < 0)
-			goto out;
-
-		/* Unreachable from current root */
-		if (error > 0) {
-			error = prepend_unreachable(&cwd, &buflen);
-			if (error)
-				goto out;
-		}
-
-		error = -ERANGE;
-		len = PATH_MAX + page - cwd;
-		if (len <= size) {
-			error = len;
-			if (copy_to_user(buf, cwd, len))
-				error = -EFAULT;
-		}
-	} else {
-		rcu_read_unlock();
-	}
-
-out:
-	__putname(page);
-	return error;
-}
-
 /*
  * Test whether new_dentry is a subdirectory of old_dentry.
  *
@@ -3585,6 +3095,8 @@ void d_genocide(struct dentry *parent)
 	d_walk(parent, parent, d_genocide_kill, NULL);
 }
 
+EXPORT_SYMBOL(d_genocide);
+
 void d_tmpfile(struct dentry *dentry, struct inode *inode)
 {
 	inode_dec_link_count(inode);
@@ -3664,8 +3176,6 @@ static void __init dcache_init(void)
 struct kmem_cache *names_cachep __read_mostly;
 EXPORT_SYMBOL(names_cachep);
 
-EXPORT_SYMBOL(d_genocide);
-
 void __init vfs_caches_init_early(void)
 {
 	int i;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 63a998c3f252..13b01351dd1c 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -270,10 +270,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
 	if (!parent)
 		parent = debugfs_mount->mnt_root;
 
-	inode_lock(d_inode(parent));
-	dentry = lookup_one_len(name, parent, strlen(name));
-	inode_unlock(d_inode(parent));
-
+	dentry = lookup_one_len_unlocked(name, parent, strlen(name));
 	if (IS_ERR(dentry))
 		return NULL;
 	if (!d_really_is_positive(dentry)) {
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f9b3e0a83526..a33d8fb1bf2a 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -243,8 +243,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
 	 */
 	ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
 			     sb->s_blocksize * 8, bh->b_data);
-	ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
-	ext4_group_desc_csum_set(sb, block_group, gdp);
 	return 0;
 }
 
@@ -340,20 +338,25 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
 	/* check whether block bitmap block number is set */
 	blk = ext4_block_bitmap(sb, desc);
 	offset = blk - group_first_block;
-	if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
+	if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+	    !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
 		/* bad block bitmap */
 		return blk;
 
 	/* check whether the inode bitmap block number is set */
 	blk = ext4_inode_bitmap(sb, desc);
 	offset = blk - group_first_block;
-	if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
+	if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+	    !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
 		/* bad block bitmap */
 		return blk;
 
 	/* check whether the inode table block number is set */
 	blk = ext4_inode_table(sb, desc);
 	offset = blk - group_first_block;
+	if (offset < 0 || EXT4_B2C(sbi, offset) >= sb->s_blocksize ||
+	    EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= sb->s_blocksize)
+		return blk;
 	next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
 			EXT4_B2C(sbi, offset + sbi->s_itb_per_group),
 			EXT4_B2C(sbi, offset));
@@ -419,6 +422,7 @@ struct buffer_head *
 ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 {
 	struct ext4_group_desc *desc;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct buffer_head *bh;
 	ext4_fsblk_t bitmap_blk;
 	int err;
@@ -427,6 +431,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 	if (!desc)
 		return ERR_PTR(-EFSCORRUPTED);
 	bitmap_blk = ext4_block_bitmap(sb, desc);
+	if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+	    (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
+		ext4_error(sb, "Invalid block bitmap block %llu in "
+			   "block_group %u", bitmap_blk, block_group);
+		return ERR_PTR(-EFSCORRUPTED);
+	}
 	bh = sb_getblk(sb, bitmap_blk);
 	if (unlikely(!bh)) {
 		ext4_error(sb, "Cannot get buffer for block bitmap - "
@@ -448,6 +458,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 		err = ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
+		set_buffer_verified(bh);
 		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
 		if (err) {
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index da87cf757f7d..e2902d394f1b 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -365,13 +365,15 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct inode *inode = file->f_mapping->host;
 	int dx_dir = is_dx_dir(inode);
-	loff_t htree_max = ext4_get_htree_eof(file);
+	loff_t ret, htree_max = ext4_get_htree_eof(file);
 
 	if (likely(dx_dir))
-		return generic_file_llseek_size(file, offset, whence,
+		ret = generic_file_llseek_size(file, offset, whence,
 						    htree_max, htree_max);
 	else
-		return ext4_llseek(file, offset, whence);
+		ret = ext4_llseek(file, offset, whence);
+	file->f_version = inode_peek_iversion(inode) - 1;
+	return ret;
 }
 
 /*
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3241475a1733..a42e71203e53 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1522,8 +1522,6 @@ enum {
 	EXT4_STATE_EXT_MIGRATE,		/* Inode is migrating */
 	EXT4_STATE_DIO_UNWRITTEN,	/* need convert on dio done*/
 	EXT4_STATE_NEWENTRY,		/* File just added to dir */
-	EXT4_STATE_DIOREAD_LOCK,	/* Disable support for dio read
-					   nolocking */
 	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
 	EXT4_STATE_EXT_PRECACHED,	/* extents have been precached */
 	EXT4_STATE_LUSTRE_EA_INODE,	/* Lustre-style ea_inode */
@@ -3181,21 +3179,6 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
 	set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
 }
 
-/*
- * Disable DIO read nolock optimization, so new dioreaders will be forced
- * to grab i_mutex
- */
-static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
-{
-	ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
-	smp_mb();
-}
-static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
-{
-	smp_mb();
-	ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
-}
-
 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
 
 /* For ioend & aio unwritten conversion wait queues */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 2d593201cf7a..7c70b08d104c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -166,13 +166,6 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
 	might_sleep();
 
 	if (ext4_handle_valid(handle)) {
-		struct super_block *sb;
-
-		sb = handle->h_transaction->t_journal->j_private;
-		if (unlikely(ext4_forced_shutdown(EXT4_SB(sb)))) {
-			jbd2_journal_abort_handle(handle);
-			return -EIO;
-		}
 		err = jbd2_journal_get_write_access(handle, bh);
 		if (err)
 			ext4_journal_abort_handle(where, line, __func__, bh,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 054416e9d827..0a7315961bac 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4796,7 +4796,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
 	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
 	/* Preallocate the range including the unaligned edges */
@@ -4807,7 +4806,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 				 round_down(offset, 1 << blkbits)) >> blkbits,
 				new_size, flags);
 		if (ret)
-			goto out_dio;
+			goto out_mutex;
 
 	}
 
@@ -4824,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		ret = ext4_update_disksize_before_punch(inode, offset, len);
 		if (ret) {
 			up_write(&EXT4_I(inode)->i_mmap_sem);
-			goto out_dio;
+			goto out_mutex;
 		}
 		/* Now release the pages and zero block aligned part of pages */
 		truncate_pagecache_range(inode, start, end - 1);
@@ -4834,10 +4833,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 					     flags);
 		up_write(&EXT4_I(inode)->i_mmap_sem);
 		if (ret)
-			goto out_dio;
+			goto out_mutex;
 	}
 	if (!partial_begin && !partial_end)
-		goto out_dio;
+		goto out_mutex;
 
 	/*
 	 * In worst case we have to writeout two nonadjacent unwritten
@@ -4850,7 +4849,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		ext4_std_error(inode->i_sb, ret);
-		goto out_dio;
+		goto out_mutex;
 	}
 
 	inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -4875,8 +4874,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		ext4_handle_sync(handle);
 
 	ext4_journal_stop(handle);
-out_dio:
-	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	inode_unlock(inode);
 	return ret;
@@ -4964,11 +4961,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	}
 
 	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
 	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
-	ext4_inode_resume_unlocked_dio(inode);
 	if (ret)
 		goto out;
 
@@ -5485,7 +5480,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	}
 
 	/* Wait for existing dio to complete */
-	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
 	/*
@@ -5562,7 +5556,6 @@ out_stop:
 	ext4_journal_stop(handle);
 out_mmap:
 	up_write(&EXT4_I(inode)->i_mmap_sem);
-	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	inode_unlock(inode);
 	return ret;
@@ -5635,7 +5628,6 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	}
 
 	/* Wait for existing dio to complete */
-	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
 	/*
@@ -5737,7 +5729,6 @@ out_stop:
 	ext4_journal_stop(handle);
 out_mmap:
 	up_write(&EXT4_I(inode)->i_mmap_sem);
-	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	inode_unlock(inode);
 	return ret;
@@ -5751,7 +5742,7 @@ out_mutex:
  * @lblk1:	Start block for first inode
  * @lblk2:	Start block for second inode
  * @count:	Number of blocks to swap
- * @mark_unwritten: Mark second inode's extents as unwritten after swap
+ * @unwritten: Mark second inode's extents as unwritten after swap
  * @erp:	Pointer to save error value
  *
  * This helper routine does exactly what is promise "swap extents". All other
@@ -5765,7 +5756,7 @@ out_mutex:
  */
 int
 ext4_swap_extents(handle_t *handle, struct inode *inode1,
-		     struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
+		  struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
 		  ext4_lblk_t count, int unwritten, int *erp)
 {
 	struct ext4_ext_path *path1 = NULL;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 7830d28df331..df92e3ec9913 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -66,44 +66,6 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
 		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
 }
 
-/* Initializes an uninitialized inode bitmap */
-static int ext4_init_inode_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t block_group,
-				       struct ext4_group_desc *gdp)
-{
-	struct ext4_group_info *grp;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	J_ASSERT_BH(bh, buffer_locked(bh));
-
-	/* If checksum is bad mark all blocks and inodes use to prevent
-	 * allocation, essentially implementing a per-group read-only flag. */
-	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-		grp = ext4_get_group_info(sb, block_group);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-		if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-			int count;
-			count = ext4_free_inodes_count(sb, gdp);
-			percpu_counter_sub(&sbi->s_freeinodes_counter,
-					   count);
-		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-		return -EFSBADCRC;
-	}
-
-	memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
-	ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
-			bh->b_data);
-	ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
-				   EXT4_INODES_PER_GROUP(sb) / 8);
-	ext4_group_desc_csum_set(sb, block_group, gdp);
-
-	return 0;
-}
-
 void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
 {
 	if (uptodate) {
@@ -160,6 +122,7 @@ static struct buffer_head *
 ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 {
 	struct ext4_group_desc *desc;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct buffer_head *bh = NULL;
 	ext4_fsblk_t bitmap_blk;
 	int err;
@@ -169,6 +132,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 		return ERR_PTR(-EFSCORRUPTED);
 
 	bitmap_blk = ext4_inode_bitmap(sb, desc);
+	if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+	    (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
+		ext4_error(sb, "Invalid inode bitmap blk %llu in "
+			   "block_group %u", bitmap_blk, block_group);
+		return ERR_PTR(-EFSCORRUPTED);
+	}
 	bh = sb_getblk(sb, bitmap_blk);
 	if (unlikely(!bh)) {
 		ext4_error(sb, "Cannot read inode bitmap - "
@@ -187,17 +156,14 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 
 	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-		err = ext4_init_inode_bitmap(sb, bh, block_group, desc);
+		memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+		ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
+				     sb->s_blocksize * 8, bh->b_data);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
 		set_buffer_verified(bh);
 		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
-		if (err) {
-			ext4_error(sb, "Failed to init inode bitmap for group "
-				   "%u: %d", block_group, err);
-			goto out;
-		}
 		return bh;
 	}
 	ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c94780075b04..18aa2ef963ad 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2694,15 +2694,6 @@ out:
 	return err;
 }
 
-static int __writepage(struct page *page, struct writeback_control *wbc,
-		       void *data)
-{
-	struct address_space *mapping = data;
-	int ret = ext4_writepage(page, wbc);
-	mapping_set_error(mapping, ret);
-	return ret;
-}
-
 static int ext4_writepages(struct address_space *mapping,
 			   struct writeback_control *wbc)
 {
@@ -2740,11 +2731,7 @@ static int ext4_writepages(struct address_space *mapping,
 		goto out_writepages;
 
 	if (ext4_should_journal_data(inode)) {
-		struct blk_plug plug;
-
-		blk_start_plug(&plug);
-		ret = write_cache_pages(mapping, wbc, __writepage, mapping);
-		blk_finish_plug(&plug);
+		ret = generic_writepages(mapping, wbc);
 		goto out_writepages;
 	}
 
@@ -3524,7 +3511,7 @@ retry:
 		iomap->flags |= IOMAP_F_DIRTY;
 	iomap->bdev = inode->i_sb->s_bdev;
 	iomap->dax_dev = sbi->s_daxdev;
-	iomap->offset = first_block << blkbits;
+	iomap->offset = (u64)first_block << blkbits;
 	iomap->length = (u64)map.m_len << blkbits;
 
 	if (ret == 0) {
@@ -3669,7 +3656,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
 	int orphan = 0;
 	handle_t *handle;
 
-	if (final_size > inode->i_size) {
+	if (final_size > inode->i_size || final_size > ei->i_disksize) {
 		/* Credits for sb + inode write */
 		handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
 		if (IS_ERR(handle)) {
@@ -3682,7 +3669,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
 			goto out;
 		}
 		orphan = 1;
-		ei->i_disksize = inode->i_size;
+		ext4_update_i_disksize(inode, inode->i_size);
 		ext4_journal_stop(handle);
 	}
 
@@ -3789,9 +3776,10 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
 			ext4_orphan_del(handle, inode);
 		if (ret > 0) {
 			loff_t end = offset + ret;
-			if (end > inode->i_size) {
-				ei->i_disksize = end;
-				i_size_write(inode, end);
+			if (end > inode->i_size || end > ei->i_disksize) {
+				ext4_update_i_disksize(inode, end);
+				if (end > inode->i_size)
+					i_size_write(inode, end);
 				/*
 				 * We're going to return a positive `ret'
 				 * here due to non-zero-length I/O, so there's
@@ -4251,7 +4239,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	}
 
 	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
 	/*
@@ -4324,7 +4311,6 @@ out_stop:
 	ext4_journal_stop(handle);
 out_dio:
 	up_write(&EXT4_I(inode)->i_mmap_sem);
-	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	inode_unlock(inode);
 	return ret;
@@ -4746,6 +4732,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 		goto bad_inode;
 	raw_inode = ext4_raw_inode(&iloc);
 
+	if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
+		EXT4_ERROR_INODE(inode, "root inode unallocated");
+		ret = -EFSCORRUPTED;
+		goto bad_inode;
+	}
+
 	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
 		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
 		if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
@@ -5506,9 +5498,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 		 */
 		if (orphan) {
 			if (!ext4_should_journal_data(inode)) {
-				ext4_inode_block_unlocked_dio(inode);
 				inode_dio_wait(inode);
-				ext4_inode_resume_unlocked_dio(inode);
 			} else
 				ext4_wait_for_tail_page_commit(inode);
 		}
@@ -5999,7 +5989,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 		return -EROFS;
 
 	/* Wait for all existing dio workers */
-	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
 	/*
@@ -6015,7 +6004,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 		err = filemap_write_and_wait(inode->i_mapping);
 		if (err < 0) {
 			up_write(&EXT4_I(inode)->i_mmap_sem);
-			ext4_inode_resume_unlocked_dio(inode);
 			return err;
 		}
 	}
@@ -6038,7 +6026,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 		if (err < 0) {
 			jbd2_journal_unlock_updates(journal);
 			percpu_up_write(&sbi->s_journal_flag_rwsem);
-			ext4_inode_resume_unlocked_dio(inode);
 			return err;
 		}
 		ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
@@ -6050,7 +6037,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 
 	if (val)
 		up_write(&EXT4_I(inode)->i_mmap_sem);
-	ext4_inode_resume_unlocked_dio(inode);
 
 	/* Finally we can mark the inode as dirty. */
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7e99ad02f1ba..a7074115d6f6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -124,8 +124,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
 	truncate_inode_pages(&inode_bl->i_data, 0);
 
 	/* Wait for all existing dio workers */
-	ext4_inode_block_unlocked_dio(inode);
-	ext4_inode_block_unlocked_dio(inode_bl);
 	inode_dio_wait(inode);
 	inode_dio_wait(inode_bl);
 
@@ -186,8 +184,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
 	ext4_double_up_write_data_sem(inode, inode_bl);
 
 journal_err_out:
-	ext4_inode_resume_unlocked_dio(inode);
-	ext4_inode_resume_unlocked_dio(inode_bl);
 	unlock_two_nondirectories(inode, inode_bl);
 	iput(inode_bl);
 	return err;
@@ -481,6 +477,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
 		return 0;
 
 	ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
+	trace_ext4_shutdown(sb, flags);
 
 	switch (flags) {
 	case EXT4_GOING_FLAGS_DEFAULT:
@@ -492,15 +489,13 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
 		set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
 		if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
 			(void) ext4_force_commit(sb);
-			jbd2_journal_abort(sbi->s_journal, 0);
+			jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN);
 		}
 		break;
 	case EXT4_GOING_FLAGS_NOLOGFLUSH:
 		set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
-		if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
-			msleep(100);
-			jbd2_journal_abort(sbi->s_journal, 0);
-		}
+		if (sbi->s_journal && !is_journal_aborted(sbi->s_journal))
+			jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN);
 		break;
 	default:
 		return -EINVAL;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index b96e4bd3b3ec..8e17efdcbf11 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -601,8 +601,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 	lock_two_nondirectories(orig_inode, donor_inode);
 
 	/* Wait for all existing dio workers */
-	ext4_inode_block_unlocked_dio(orig_inode);
-	ext4_inode_block_unlocked_dio(donor_inode);
 	inode_dio_wait(orig_inode);
 	inode_dio_wait(donor_inode);
 
@@ -693,8 +691,6 @@ out:
 	ext4_ext_drop_refs(path);
 	kfree(path);
 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
-	ext4_inode_resume_unlocked_dio(orig_inode);
-	ext4_inode_resume_unlocked_dio(donor_inode);
 	unlock_two_nondirectories(orig_inode, donor_inode);
 
 	return ret;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 39bf464c35f1..185f7e61f4cf 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -101,15 +101,13 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
  *   i_data_sem (rw)
  *
  * truncate:
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
- *   i_mmap_rwsem (w) -> page lock
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
- *   transaction start -> i_data_sem (rw)
+ * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
+ *   i_data_sem (rw)
  *
  * direct IO:
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
- *   transaction start -> i_data_sem (rw)
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
  *
  * writepages:
  * transaction start -> page lock(s) -> i_data_sem (rw)
@@ -448,6 +446,7 @@ void __ext4_error(struct super_block *sb, const char *function,
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 		return;
 
+	trace_ext4_error(sb, function, line);
 	if (ext4_error_ratelimit(sb)) {
 		va_start(args, fmt);
 		vaf.fmt = fmt;
@@ -472,6 +471,7 @@ void __ext4_error_inode(struct inode *inode, const char *function,
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 		return;
 
+	trace_ext4_error(inode->i_sb, function, line);
 	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 	es->s_last_error_block = cpu_to_le64(block);
 	if (ext4_error_ratelimit(inode->i_sb)) {
@@ -507,6 +507,7 @@ void __ext4_error_file(struct file *file, const char *function,
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 		return;
 
+	trace_ext4_error(inode->i_sb, function, line);
 	es = EXT4_SB(inode->i_sb)->s_es;
 	es->s_last_error_ino = cpu_to_le32(inode->i_ino);
 	if (ext4_error_ratelimit(inode->i_sb)) {
@@ -719,6 +720,7 @@ __acquires(bitlock)
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
 		return;
 
+	trace_ext4_error(sb, function, line);
 	es->s_last_error_ino = cpu_to_le32(ino);
 	es->s_last_error_block = cpu_to_le64(block);
 	__save_error_info(sb, function, line);
@@ -2019,7 +2021,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_super_block *es = sbi->s_es;
-	int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
+	int def_errors, def_mount_opt = sbi->s_def_mount_opt;
 	const struct mount_opts *m;
 	char sep = nodefs ? '\n' : ',';
 
@@ -2034,7 +2036,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 		if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
 		    (m->flags & MOPT_CLEAR_ERR))
 			continue;
-		if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
+		if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
 			continue; /* skip if same as the default */
 		if ((want_set &&
 		     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
@@ -2068,7 +2070,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 		SEQ_OPTS_PUTS("i_version");
 	if (nodefs || sbi->s_stripe)
 		SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
-	if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
+	if (nodefs || EXT4_MOUNT_DATA_FLAGS &
+			(sbi->s_mount_opt ^ def_mount_opt)) {
 		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
 			SEQ_OPTS_PUTS("data=journal");
 		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
@@ -2081,7 +2084,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 		SEQ_OPTS_PRINT("inode_readahead_blks=%u",
 			       sbi->s_inode_readahead_blks);
 
-	if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
+	if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
 		       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
 		SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
 	if (nodefs || sbi->s_max_dir_size_kb)
@@ -2333,6 +2336,8 @@ static int ext4_check_descriptors(struct super_block *sb,
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 				 "Block bitmap for group %u overlaps "
 				 "superblock", i);
+			if (!sb_rdonly(sb))
+				return 0;
 		}
 		if (block_bitmap < first_block || block_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2345,6 +2350,8 @@ static int ext4_check_descriptors(struct super_block *sb,
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 				 "Inode bitmap for group %u overlaps "
 				 "superblock", i);
+			if (!sb_rdonly(sb))
+				return 0;
 		}
 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2357,6 +2364,8 @@ static int ext4_check_descriptors(struct super_block *sb,
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 				 "Inode table for group %u overlaps "
 				 "superblock", i);
+			if (!sb_rdonly(sb))
+				return 0;
 		}
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
@@ -3490,15 +3499,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* Load the checksum driver */
-	if (ext4_has_feature_metadata_csum(sb) ||
-	    ext4_has_feature_ea_inode(sb)) {
-		sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
-		if (IS_ERR(sbi->s_chksum_driver)) {
-			ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
-			ret = PTR_ERR(sbi->s_chksum_driver);
-			sbi->s_chksum_driver = NULL;
-			goto failed_mount;
-		}
+	sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+	if (IS_ERR(sbi->s_chksum_driver)) {
+		ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
+		ret = PTR_ERR(sbi->s_chksum_driver);
+		sbi->s_chksum_driver = NULL;
+		goto failed_mount;
 	}
 
 	/* Check superblock checksum */
@@ -3660,6 +3666,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
 				 "using the ext4 subsystem");
 		else {
+			/*
+			 * If we're probing be silent, if this looks like
+			 * it's actually an ext[34] filesystem.
+			 */
+			if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
+				goto failed_mount;
 			ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
 				 "to feature incompatibilities");
 			goto failed_mount;
@@ -3671,6 +3683,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
 				 "using the ext4 subsystem");
 		else {
+			/*
+			 * If we're probing be silent, if this looks like
+			 * it's actually an ext4 filesystem.
+			 */
+			if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
+				goto failed_mount;
 			ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
 				 "to feature incompatibilities");
 			goto failed_mount;
@@ -4094,10 +4112,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		 * cope, else JOURNAL_DATA
 		 */
 		if (jbd2_journal_check_available_features
-		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
+		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
 			set_opt(sb, ORDERED_DATA);
-		else
+			sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
+		} else {
 			set_opt(sb, JOURNAL_DATA);
+			sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
+		}
 		break;
 
 	case EXT4_MOUNT_ORDERED_DATA:
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 1205261f130c..9ebd26c957c2 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -49,8 +49,7 @@ struct ext4_attr {
 	} u;
 };
 
-static ssize_t session_write_kbytes_show(struct ext4_attr *a,
-					 struct ext4_sb_info *sbi, char *buf)
+static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 {
 	struct super_block *sb = sbi->s_buddy_cache->i_sb;
 
@@ -61,8 +60,7 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a,
 			 sbi->s_sectors_written_start) >> 1);
 }
 
-static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
-					  struct ext4_sb_info *sbi, char *buf)
+static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 {
 	struct super_block *sb = sbi->s_buddy_cache->i_sb;
 
@@ -74,8 +72,7 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
 			  EXT4_SB(sb)->s_sectors_written_start) >> 1)));
 }
 
-static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
-					  struct ext4_sb_info *sbi,
+static ssize_t inode_readahead_blks_store(struct ext4_sb_info *sbi,
 					  const char *buf, size_t count)
 {
 	unsigned long t;
@@ -92,8 +89,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
 	return count;
 }
 
-static ssize_t reserved_clusters_store(struct ext4_attr *a,
-				   struct ext4_sb_info *sbi,
+static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi,
 				   const char *buf, size_t count)
 {
 	unsigned long long val;
@@ -109,8 +105,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
 	return count;
 }
 
-static ssize_t trigger_test_error(struct ext4_attr *a,
-				  struct ext4_sb_info *sbi,
+static ssize_t trigger_test_error(struct ext4_sb_info *sbi,
 				  const char *buf, size_t count)
 {
 	int len = count;
@@ -268,9 +263,9 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
 				(s64) EXT4_C2B(sbi,
 		       percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
 	case attr_session_write_kbytes:
-		return session_write_kbytes_show(a, sbi, buf);
+		return session_write_kbytes_show(sbi, buf);
 	case attr_lifetime_write_kbytes:
-		return lifetime_write_kbytes_show(a, sbi, buf);
+		return lifetime_write_kbytes_show(sbi, buf);
 	case attr_reserved_clusters:
 		return snprintf(buf, PAGE_SIZE, "%llu\n",
 				(unsigned long long)
@@ -306,7 +301,7 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
 
 	switch (a->attr_id) {
 	case attr_reserved_clusters:
-		return reserved_clusters_store(a, sbi, buf, len);
+		return reserved_clusters_store(sbi, buf, len);
 	case attr_pointer_ui:
 		if (!ptr)
 			return 0;
@@ -316,9 +311,9 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
 		*((unsigned int *) ptr) = t;
 		return len;
 	case attr_inode_readahead:
-		return inode_readahead_blks_store(a, sbi, buf, len);
+		return inode_readahead_blks_store(sbi, buf, len);
 	case attr_trigger_test_error:
-		return trigger_test_error(a, sbi, buf, len);
+		return trigger_test_error(sbi, buf, len);
 	}
 	return 0;
 }
@@ -330,13 +325,6 @@ static void ext4_sb_release(struct kobject *kobj)
 	complete(&sbi->s_kobj_unregister);
 }
 
-static void ext4_kset_release(struct kobject *kobj)
-{
-	struct kset *kset = container_of(kobj, struct kset, kobj);
-
-	kfree(kset);
-}
-
 static const struct sysfs_ops ext4_attr_ops = {
 	.show	= ext4_attr_show,
 	.store	= ext4_attr_store,
@@ -348,19 +336,14 @@ static struct kobj_type ext4_sb_ktype = {
 	.release	= ext4_sb_release,
 };
 
-static struct kobj_type ext4_ktype = {
-	.sysfs_ops	= &ext4_attr_ops,
-	.release	= ext4_kset_release,
-};
-
-static struct kset *ext4_kset;
-
 static struct kobj_type ext4_feat_ktype = {
 	.default_attrs	= ext4_feat_attrs,
 	.sysfs_ops	= &ext4_attr_ops,
 	.release	= (void (*)(struct kobject *))kfree,
 };
 
+static struct kobject *ext4_root;
+
 static struct kobject *ext4_feat;
 
 #define PROC_FILE_SHOW_DEFN(name) \
@@ -398,9 +381,8 @@ int ext4_register_sysfs(struct super_block *sb)
 	const struct ext4_proc_files *p;
 	int err;
 
-	sbi->s_kobj.kset = ext4_kset;
 	init_completion(&sbi->s_kobj_unregister);
-	err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, NULL,
+	err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root,
 				   "%s", sb->s_id);
 	if (err) {
 		kobject_put(&sbi->s_kobj);
@@ -436,26 +418,18 @@ int __init ext4_init_sysfs(void)
 {
 	int ret;
 
-	ext4_kset = kzalloc(sizeof(*ext4_kset), GFP_KERNEL);
-	if (!ext4_kset)
+	ext4_root = kobject_create_and_add("ext4", fs_kobj);
+	if (!ext4_root)
 		return -ENOMEM;
 
-	kobject_set_name(&ext4_kset->kobj, "ext4");
-	ext4_kset->kobj.parent = fs_kobj;
-	ext4_kset->kobj.ktype = &ext4_ktype;
-	ret = kset_register(ext4_kset);
-	if (ret)
-		goto kset_err;
-
 	ext4_feat = kzalloc(sizeof(*ext4_feat), GFP_KERNEL);
 	if (!ext4_feat) {
 		ret = -ENOMEM;
-		goto kset_err;
+		goto root_err;
 	}
 
-	ext4_feat->kset = ext4_kset;
 	ret = kobject_init_and_add(ext4_feat, &ext4_feat_ktype,
-				   NULL, "features");
+				   ext4_root, "features");
 	if (ret)
 		goto feat_err;
 
@@ -464,17 +438,19 @@ int __init ext4_init_sysfs(void)
 
 feat_err:
 	kobject_put(ext4_feat);
-kset_err:
-	kset_unregister(ext4_kset);
-	ext4_kset = NULL;
+	ext4_feat = NULL;
+root_err:
+	kobject_put(ext4_root);
+	ext4_root = NULL;
 	return ret;
 }
 
 void ext4_exit_sysfs(void)
 {
 	kobject_put(ext4_feat);
-	kset_unregister(ext4_kset);
-	ext4_kset = NULL;
+	ext4_feat = NULL;
+	kobject_put(ext4_root);
+	ext4_root = NULL;
 	remove_proc_entry(proc_dirname, NULL);
 	ext4_proc_root = NULL;
 }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 63656dbafdc4..499cb4b1fbd2 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -195,10 +195,13 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
 
 	/* Check the values */
 	while (!IS_LAST_ENTRY(entry)) {
-		if (entry->e_value_size != 0 &&
-		    entry->e_value_inum == 0) {
+		u32 size = le32_to_cpu(entry->e_value_size);
+
+		if (size > EXT4_XATTR_SIZE_MAX)
+			return -EFSCORRUPTED;
+
+		if (size != 0 && entry->e_value_inum == 0) {
 			u16 offs = le16_to_cpu(entry->e_value_offs);
-			u32 size = le32_to_cpu(entry->e_value_size);
 			void *value;
 
 			/*
@@ -222,25 +225,36 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
 }
 
 static inline int
-ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
+__ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
+			 const char *function, unsigned int line)
 {
-	int error;
+	int error = -EFSCORRUPTED;
 
 	if (buffer_verified(bh))
 		return 0;
 
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
-		return -EFSCORRUPTED;
+		goto errout;
+	error = -EFSBADCRC;
 	if (!ext4_xattr_block_csum_verify(inode, bh))
-		return -EFSBADCRC;
+		goto errout;
 	error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size,
 					 bh->b_data);
-	if (!error)
+errout:
+	if (error)
+		__ext4_error_inode(inode, function, line, 0,
+				   "corrupted xattr block %llu",
+				   (unsigned long long) bh->b_blocknr);
+	else
 		set_buffer_verified(bh);
 	return error;
 }
 
+#define ext4_xattr_check_block(inode, bh) \
+	__ext4_xattr_check_block((inode), (bh),  __func__, __LINE__)
+
+
 static int
 __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
 			 void *end, const char *function, unsigned int line)
@@ -262,18 +276,22 @@ errout:
 	__xattr_check_inode((inode), (header), (end), __func__, __LINE__)
 
 static int
-ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
-		      const char *name, int sorted)
+xattr_find_entry(struct inode *inode, struct ext4_xattr_entry **pentry,
+		 void *end, int name_index, const char *name, int sorted)
 {
-	struct ext4_xattr_entry *entry;
+	struct ext4_xattr_entry *entry, *next;
 	size_t name_len;
 	int cmp = 1;
 
 	if (name == NULL)
 		return -EINVAL;
 	name_len = strlen(name);
-	entry = *pentry;
-	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+	for (entry = *pentry; !IS_LAST_ENTRY(entry); entry = next) {
+		next = EXT4_XATTR_NEXT(entry);
+		if ((void *) next >= end) {
+			EXT4_ERROR_INODE(inode, "corrupted xattr entries");
+			return -EFSCORRUPTED;
+		}
 		cmp = name_index - entry->e_name_index;
 		if (!cmp)
 			cmp = name_len - entry->e_name_len;
@@ -495,6 +513,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 	struct buffer_head *bh = NULL;
 	struct ext4_xattr_entry *entry;
 	size_t size;
+	void *end;
 	int error;
 	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
 
@@ -511,20 +530,20 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-	if (ext4_xattr_check_block(inode, bh)) {
-		EXT4_ERROR_INODE(inode, "bad block %llu",
-				 EXT4_I(inode)->i_file_acl);
-		error = -EFSCORRUPTED;
+	error = ext4_xattr_check_block(inode, bh);
+	if (error)
 		goto cleanup;
-	}
 	ext4_xattr_block_cache_insert(ea_block_cache, bh);
 	entry = BFIRST(bh);
-	error = ext4_xattr_find_entry(&entry, name_index, name, 1);
+	end = bh->b_data + bh->b_size;
+	error = xattr_find_entry(inode, &entry, end, name_index, name, 1);
 	if (error)
 		goto cleanup;
 	size = le32_to_cpu(entry->e_value_size);
+	error = -ERANGE;
+	if (unlikely(size > EXT4_XATTR_SIZE_MAX))
+		goto cleanup;
 	if (buffer) {
-		error = -ERANGE;
 		if (size > buffer_size)
 			goto cleanup;
 		if (entry->e_value_inum) {
@@ -533,8 +552,12 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 			if (error)
 				goto cleanup;
 		} else {
-			memcpy(buffer, bh->b_data +
-			       le16_to_cpu(entry->e_value_offs), size);
+			u16 offset = le16_to_cpu(entry->e_value_offs);
+			void *p = bh->b_data + offset;
+
+			if (unlikely(p + size > end))
+				goto cleanup;
+			memcpy(buffer, p, size);
 		}
 	}
 	error = size;
@@ -568,12 +591,14 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 	if (error)
 		goto cleanup;
 	entry = IFIRST(header);
-	error = ext4_xattr_find_entry(&entry, name_index, name, 0);
+	error = xattr_find_entry(inode, &entry, end, name_index, name, 0);
 	if (error)
 		goto cleanup;
 	size = le32_to_cpu(entry->e_value_size);
+	error = -ERANGE;
+	if (unlikely(size > EXT4_XATTR_SIZE_MAX))
+		goto cleanup;
 	if (buffer) {
-		error = -ERANGE;
 		if (size > buffer_size)
 			goto cleanup;
 		if (entry->e_value_inum) {
@@ -582,8 +607,12 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 			if (error)
 				goto cleanup;
 		} else {
-			memcpy(buffer, (void *)IFIRST(header) +
-			       le16_to_cpu(entry->e_value_offs), size);
+			u16 offset = le16_to_cpu(entry->e_value_offs);
+			void *p = (void *)IFIRST(header) + offset;
+
+			if (unlikely(p + size > end))
+				goto cleanup;
+			memcpy(buffer, p, size);
 		}
 	}
 	error = size;
@@ -676,12 +705,9 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-	if (ext4_xattr_check_block(inode, bh)) {
-		EXT4_ERROR_INODE(inode, "bad block %llu",
-				 EXT4_I(inode)->i_file_acl);
-		error = -EFSCORRUPTED;
+	error = ext4_xattr_check_block(inode, bh);
+	if (error)
 		goto cleanup;
-	}
 	ext4_xattr_block_cache_insert(EA_BLOCK_CACHE(inode), bh);
 	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
 
@@ -808,10 +834,9 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
 			goto out;
 		}
 
-		if (ext4_xattr_check_block(inode, bh)) {
-			ret = -EFSCORRUPTED;
+		ret = ext4_xattr_check_block(inode, bh);
+		if (ret)
 			goto out;
-		}
 
 		for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
 		     entry = EXT4_XATTR_NEXT(entry))
@@ -1793,19 +1818,16 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
 		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
 			atomic_read(&(bs->bh->b_count)),
 			le32_to_cpu(BHDR(bs->bh)->h_refcount));
-		if (ext4_xattr_check_block(inode, bs->bh)) {
-			EXT4_ERROR_INODE(inode, "bad block %llu",
-					 EXT4_I(inode)->i_file_acl);
-			error = -EFSCORRUPTED;
+		error = ext4_xattr_check_block(inode, bs->bh);
+		if (error)
 			goto cleanup;
-		}
 		/* Find the named attribute. */
 		bs->s.base = BHDR(bs->bh);
 		bs->s.first = BFIRST(bs->bh);
 		bs->s.end = bs->bh->b_data + bs->bh->b_size;
 		bs->s.here = bs->s.first;
-		error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
-					      i->name, 1);
+		error = xattr_find_entry(inode, &bs->s.here, bs->s.end,
+					 i->name_index, i->name, 1);
 		if (error && error != -ENODATA)
 			goto cleanup;
 		bs->s.not_found = error;
@@ -2164,8 +2186,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
 		if (error)
 			return error;
 		/* Find the named attribute. */
-		error = ext4_xattr_find_entry(&is->s.here, i->name_index,
-					      i->name, 0);
+		error = xattr_find_entry(inode, &is->s.here, is->s.end,
+					 i->name_index, i->name, 0);
 		if (error && error != -ENODATA)
 			return error;
 		is->s.not_found = error;
@@ -2721,13 +2743,9 @@ retry:
 		error = -EIO;
 		if (!bh)
 			goto cleanup;
-		if (ext4_xattr_check_block(inode, bh)) {
-			EXT4_ERROR_INODE(inode, "bad block %llu",
-					 EXT4_I(inode)->i_file_acl);
-			error = -EFSCORRUPTED;
-			brelse(bh);
+		error = ext4_xattr_check_block(inode, bh);
+		if (error)
 			goto cleanup;
-		}
 		base = BHDR(bh);
 		end = bh->b_data + bh->b_size;
 		min_offs = end - base;
@@ -2884,11 +2902,8 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
 			goto cleanup;
 		}
 		error = ext4_xattr_check_block(inode, bh);
-		if (error) {
-			EXT4_ERROR_INODE(inode, "bad block %llu (error %d)",
-					 EXT4_I(inode)->i_file_acl, error);
+		if (error)
 			goto cleanup;
-		}
 
 		if (ext4_has_feature_ea_inode(inode->i_sb)) {
 			for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index dd54c4f995c8..f39cad2abe2a 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -71,6 +71,17 @@ struct ext4_xattr_entry {
 #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
 
 /*
+ * XATTR_SIZE_MAX is currently 64k, but for the purposes of checking
+ * for file system consistency errors, we use a somewhat bigger value.
+ * This allows XATTR_SIZE_MAX to grow in the future, but by using this
+ * instead of INT_MAX for certain consistency checks, we don't need to
+ * worry about arithmetic overflows.  (Actually XATTR_SIZE_MAX is
+ * defined in include/uapi/linux/limits.h, so changing it is going
+ * not going to be trivial....)
+ */
+#define EXT4_XATTR_SIZE_MAX (1 << 24)
+
+/*
  * The minimum size of EA value when you start storing it in an external inode
  * size of block - size of header - size of 1 entry - 4 null bytes
 */
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 2f725b4a386b..f58716567972 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -940,13 +940,13 @@ failed:
 }
 
 /**
- * gfs2_set_page_dirty - Page dirtying function
+ * jdata_set_page_dirty - Page dirtying function
  * @page: The page to dirty
  *
  * Returns: 1 if it dirtyed the page, or 0 otherwise
  */
  
-static int gfs2_set_page_dirty(struct page *page)
+static int jdata_set_page_dirty(struct page *page)
 {
 	SetPageChecked(page);
 	return __set_page_dirty_buffers(page);
@@ -1214,7 +1214,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
 	.readpages = gfs2_readpages,
 	.write_begin = gfs2_write_begin,
 	.write_end = gfs2_write_end,
-	.set_page_dirty = gfs2_set_page_dirty,
+	.set_page_dirty = __set_page_dirty_buffers,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
@@ -1231,7 +1231,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
 	.readpages = gfs2_readpages,
 	.write_begin = gfs2_write_begin,
 	.write_end = gfs2_write_end,
-	.set_page_dirty = gfs2_set_page_dirty,
+	.set_page_dirty = jdata_set_page_dirty,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 51f940e76c5e..685c305cbeb6 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -491,14 +491,12 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct super_block *sb = sdp->sd_vfs;
 	struct buffer_head *dibh = mp->mp_bh[0];
 	u64 bn;
 	unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 	unsigned dblks = 0;
 	unsigned ptrs_per_blk;
 	const unsigned end_of_metadata = mp->mp_fheight - 1;
-	int ret;
 	enum alloc_state state;
 	__be64 *ptr;
 	__be64 zero_bn = 0;
@@ -607,15 +605,6 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 			iomap->flags |= IOMAP_F_NEW;
 			while (n-- > 0)
 				*ptr++ = cpu_to_be64(bn++);
-			if (flags & IOMAP_ZERO) {
-				ret = sb_issue_zeroout(sb, iomap->addr >> inode->i_blkbits,
-						       dblks, GFP_NOFS);
-				if (ret) {
-					fs_err(sdp,
-					       "Failed to zero data buffers\n");
-					flags &= ~IOMAP_ZERO;
-				}
-			}
 			break;
 		}
 	} while (iomap->addr == IOMAP_NULL_ADDR);
@@ -812,15 +801,22 @@ do_alloc:
 }
 
 /**
- * gfs2_block_map - Map a block from an inode to a disk block
+ * gfs2_block_map - Map one or more blocks of an inode to a disk block
  * @inode: The inode
  * @lblock: The logical block number
  * @bh_map: The bh to be mapped
  * @create: True if its ok to alloc blocks to satify the request
  *
- * Sets buffer_mapped() if successful, sets buffer_boundary() if a
- * read of metadata will be required before the next block can be
- * mapped. Sets buffer_new() if new blocks were allocated.
+ * The size of the requested mapping is defined in bh_map->b_size.
+ *
+ * Clears buffer_mapped(bh_map) and leaves bh_map->b_size unchanged
+ * when @lblock is not mapped.  Sets buffer_mapped(bh_map) and
+ * bh_map->b_size to indicate the size of the mapping when @lblock and
+ * successive blocks are mapped, up to the requested size.
+ *
+ * Sets buffer_boundary() if a read of metadata will be required
+ * before the next block can be mapped. Sets buffer_new() if new
+ * blocks were allocated.
  *
  * Returns: errno
  */
@@ -839,8 +835,6 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
 
 	if (create)
 		flags |= IOMAP_WRITE;
-	if (buffer_zeronew(bh_map))
-		flags |= IOMAP_ZERO;
 	ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
 			       bh_map->b_size, flags, &iomap);
 	if (ret) {
@@ -1344,6 +1338,7 @@ static inline bool walk_done(struct gfs2_sbd *sdp,
 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	u64 maxsize = sdp->sd_heightsize[ip->i_height];
 	struct metapath mp = {};
 	struct buffer_head *dibh, *bh;
 	struct gfs2_holder rd_gh;
@@ -1359,6 +1354,14 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
 	u64 prev_bnr = 0;
 	__be64 *start, *end;
 
+	if (offset >= maxsize) {
+		/*
+		 * The starting point lies beyond the allocated meta-data;
+		 * there are no blocks do deallocate.
+		 */
+		return 0;
+	}
+
 	/*
 	 * The start position of the hole is defined by lblock, start_list, and
 	 * start_aligned.  The end position of the hole is defined by lend,
@@ -1372,7 +1375,6 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
 	 */
 
 	if (length) {
-		u64 maxsize = sdp->sd_heightsize[ip->i_height];
 		u64 end_offset = offset + length;
 		u64 lend;
 
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 7c21aea0266b..d9fb0ad6cc30 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1940,7 +1940,6 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 {
 	struct buffer_head *bh;
 	struct gfs2_dirent *dent;
-	int error;
 
 	dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh);
 	if (!dent) {
@@ -1953,18 +1952,10 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
 	gfs2_trans_add_meta(dip->i_gl, bh);
 	gfs2_inum_out(nip, dent);
 	dent->de_type = cpu_to_be16(new_type);
-
-	if (dip->i_diskflags & GFS2_DIF_EXHASH) {
-		brelse(bh);
-		error = gfs2_meta_inode_buffer(dip, &bh);
-		if (error)
-			return error;
-		gfs2_trans_add_meta(dip->i_gl, bh);
-	}
+	brelse(bh);
 
 	dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode);
-	gfs2_dinode_out(dip, bh->b_data);
-	brelse(bh);
+	mark_inode_dirty_sync(&dip->i_inode);
 	return 0;
 }
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4f88e201b3f0..4b71f021a9e2 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -729,11 +729,12 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 			   int mode)
 {
+	struct super_block *sb = inode->i_sb;
 	struct gfs2_inode *ip = GFS2_I(inode);
+	loff_t end = offset + len;
 	struct buffer_head *dibh;
+	struct iomap iomap;
 	int error;
-	unsigned int nr_blks;
-	sector_t lblock = offset >> inode->i_blkbits;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (unlikely(error))
@@ -747,21 +748,19 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 			goto out;
 	}
 
-	while (len) {
-		struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
-		bh_map.b_size = len;
-		set_buffer_zeronew(&bh_map);
-
-		error = gfs2_block_map(inode, lblock, &bh_map, 1);
-		if (unlikely(error))
+	while (offset < end) {
+		error = gfs2_iomap_begin(inode, offset, end - offset,
+					 IOMAP_WRITE, &iomap);
+		if (error)
 			goto out;
-		len -= bh_map.b_size;
-		nr_blks = bh_map.b_size >> inode->i_blkbits;
-		lblock += nr_blks;
-		if (!buffer_new(&bh_map))
+		offset = iomap.offset + iomap.length;
+		if (iomap.type != IOMAP_HOLE)
 			continue;
-		if (unlikely(!buffer_zeronew(&bh_map))) {
-			error = -EIO;
+		error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits,
+					 iomap.length >> inode->i_blkbits,
+					 GFP_NOFS);
+		if (error) {
+			fs_err(GFS2_SB(inode), "Failed to zero data buffers\n");
 			goto out;
 		}
 	}
@@ -809,7 +808,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_alloc_parms ap = { .aflags = 0, };
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-	loff_t bytes, max_bytes, max_blks = UINT_MAX;
+	loff_t bytes, max_bytes, max_blks;
 	int error;
 	const loff_t pos = offset;
 	const loff_t count = len;
@@ -861,7 +860,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 			return error;
 		/* ap.allowed tells us how many blocks quota will allow
 		 * us to write. Check if this reduces max_blks */
-		if (ap.allowed && ap.allowed < max_blks)
+		max_blks = UINT_MAX;
+		if (ap.allowed)
 			max_blks = ap.allowed;
 
 		error = gfs2_inplace_reserve(ip, &ap);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e0557b8a590a..1b6b1e3f5caf 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -130,15 +130,12 @@ static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
 enum gfs2_state_bits {
 	BH_Pinned = BH_PrivateStart,
 	BH_Escaped = BH_PrivateStart + 1,
-	BH_Zeronew = BH_PrivateStart + 2,
 };
 
 BUFFER_FNS(Pinned, pinned)
 TAS_BUFFER_FNS(Pinned, pinned)
 BUFFER_FNS(Escaped, escaped)
 TAS_BUFFER_FNS(Escaped, escaped)
-BUFFER_FNS(Zeronew, zeronew)
-TAS_BUFFER_FNS(Zeronew, zeronew)
 
 struct gfs2_bufdata {
 	struct buffer_head *bd_bh;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 59e0560180ec..8700eb815638 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1326,19 +1326,11 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
 static int update_moved_ino(struct gfs2_inode *ip, struct gfs2_inode *ndip,
 			    int dir_rename)
 {
-	int error;
-	struct buffer_head *dibh;
-
 	if (dir_rename)
 		return gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		return error;
 	ip->i_inode.i_ctime = current_time(&ip->i_inode);
-	gfs2_trans_add_meta(ip->i_gl, dibh);
-	gfs2_dinode_out(ip, dibh->b_data);
-	brelse(dibh);
+	mark_inode_dirty_sync(&ip->i_inode);
 	return 0;
 }
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cf6b46247df4..0248835625f1 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -73,7 +73,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
  *
  */
 
-void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
+static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 {
 	bd->bd_tr = NULL;
 	list_del_init(&bd->bd_ail_st_list);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 93b52ac1ca1f..1862e310a067 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -70,7 +70,6 @@ extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 			   u32 type);
 extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
-extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
 extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
 
 extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 5e47c935a515..836f29480be6 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -45,6 +45,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	int ret;
+
+	ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
 	if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
 		return 0;
 	ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index b6b258998bcd..d8b622c375ab 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -15,6 +15,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/crc32c.h>
+#include <linux/ktime.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -409,12 +410,13 @@ void gfs2_recover_func(struct work_struct *work)
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 	struct gfs2_log_header_host head;
 	struct gfs2_holder j_gh, ji_gh, thaw_gh;
-	unsigned long t;
+	ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
 	int ro = 0;
 	unsigned int pass;
 	int error;
 	int jlocked = 0;
 
+	t_start = ktime_get();
 	if (sdp->sd_args.ar_spectator ||
 	    (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) {
 		fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
@@ -446,6 +448,7 @@ void gfs2_recover_func(struct work_struct *work)
 		fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
 	}
 
+	t_jlck = ktime_get();
 	fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
 
 	error = gfs2_jdesc_check(jd);
@@ -455,13 +458,12 @@ void gfs2_recover_func(struct work_struct *work)
 	error = gfs2_find_jhead(jd, &head);
 	if (error)
 		goto fail_gunlock_ji;
+	t_jhd = ktime_get();
 
 	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 		fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
 			jd->jd_jid);
 
-		t = jiffies;
-
 		/* Acquire a shared hold on the freeze lock */
 
 		error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
@@ -495,6 +497,7 @@ void gfs2_recover_func(struct work_struct *work)
 			goto fail_gunlock_thaw;
 		}
 
+		t_tlck = ktime_get();
 		fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
 
 		for (pass = 0; pass < 2; pass++) {
@@ -509,9 +512,14 @@ void gfs2_recover_func(struct work_struct *work)
 		clean_journal(jd, &head);
 
 		gfs2_glock_dq_uninit(&thaw_gh);
-		t = DIV_ROUND_UP(jiffies - t, HZ);
-		fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
-			jd->jd_jid, t);
+		t_rep = ktime_get();
+		fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
+			"jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
+			jd->jd_jid, ktime_ms_delta(t_rep, t_start),
+			ktime_ms_delta(t_jlck, t_start),
+			ktime_ms_delta(t_jhd, t_jlck),
+			ktime_ms_delta(t_tlck, t_jhd),
+			ktime_ms_delta(t_rep, t_tlck));
 	}
 
 	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index b9318b49ff8f..cb10b95efe0f 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -515,6 +515,7 @@ TRACE_EVENT(gfs2_iomap_end,
 		__field(	u64,	inum			)
 		__field(	loff_t, offset			)
 		__field(	ssize_t, length			)
+		__field(	sector_t, pblock		)
 		__field(	u16,	flags			)
 		__field(	u16,	type			)
 		__field(	int,	ret			)
@@ -525,16 +526,20 @@ TRACE_EVENT(gfs2_iomap_end,
 		__entry->inum		= ip->i_no_addr;
 		__entry->offset		= iomap->offset;
 		__entry->length		= iomap->length;
+		__entry->pblock		= iomap->addr == IOMAP_NULL_ADDR ? 0 :
+					 (iomap->addr >> ip->i_inode.i_blkbits);
 		__entry->flags		= iomap->flags;
 		__entry->type		= iomap->type;
 		__entry->ret		= ret;
 	),
 
-	TP_printk("%u,%u bmap %llu iomap end %llu/%lu ty:%d flags:%08x rc:%d",
+	TP_printk("%u,%u bmap %llu iomap end %llu/%lu to %llu ty:%d flags:%08x rc:%d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->offset,
-		  (unsigned long)__entry->length, (u16)__entry->type,
+		  (unsigned long)__entry->length,
+		  (long long)__entry->pblock,
+		  (u16)__entry->type,
 		  (u16)__entry->flags, __entry->ret)
 );
 
diff --git a/fs/inode.c b/fs/inode.c
index ef362364d396..b153aeaa61ea 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -346,9 +346,8 @@ void inc_nlink(struct inode *inode)
 }
 EXPORT_SYMBOL(inc_nlink);
 
-void address_space_init_once(struct address_space *mapping)
+static void __address_space_init_once(struct address_space *mapping)
 {
-	memset(mapping, 0, sizeof(*mapping));
 	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
 	spin_lock_init(&mapping->tree_lock);
 	init_rwsem(&mapping->i_mmap_rwsem);
@@ -356,6 +355,12 @@ void address_space_init_once(struct address_space *mapping)
 	spin_lock_init(&mapping->private_lock);
 	mapping->i_mmap = RB_ROOT_CACHED;
 }
+
+void address_space_init_once(struct address_space *mapping)
+{
+	memset(mapping, 0, sizeof(*mapping));
+	__address_space_init_once(mapping);
+}
 EXPORT_SYMBOL(address_space_init_once);
 
 /*
@@ -371,7 +376,7 @@ void inode_init_once(struct inode *inode)
 	INIT_LIST_HEAD(&inode->i_io_list);
 	INIT_LIST_HEAD(&inode->i_wb_list);
 	INIT_LIST_HEAD(&inode->i_lru);
-	address_space_init_once(&inode->i_data);
+	__address_space_init_once(&inode->i_data);
 	i_size_ordered_init(inode);
 }
 EXPORT_SYMBOL(inode_init_once);
@@ -1533,7 +1538,6 @@ retry:
 	if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
 		if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
 			atomic_inc(&inode->i_count);
-			inode->i_state &= ~I_DIRTY_TIME;
 			spin_unlock(&inode->i_lock);
 			trace_writeback_lazytime_iput(inode);
 			mark_inode_dirty_sync(inode);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 3fbf48ec2188..dfb057900e79 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -974,7 +974,7 @@ out:
 }
 
 /*
- * This is a variaon of __jbd2_update_log_tail which checks for validity of
+ * This is a variation of __jbd2_update_log_tail which checks for validity of
  * provided log tail and locks j_checkpoint_mutex. So it is safe against races
  * with other threads updating log tail.
  */
@@ -1417,6 +1417,9 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
 	journal_superblock_t *sb = journal->j_superblock;
 	int ret;
 
+	if (is_journal_aborted(journal))
+		return -EIO;
+
 	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
 	jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
 		  tail_block, tail_tid);
@@ -1483,12 +1486,15 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
 void jbd2_journal_update_sb_errno(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
+	int errcode;
 
 	read_lock(&journal->j_state_lock);
-	jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
-		  journal->j_errno);
-	sb->s_errno    = cpu_to_be32(journal->j_errno);
+	errcode = journal->j_errno;
 	read_unlock(&journal->j_state_lock);
+	if (errcode == -ESHUTDOWN)
+		errcode = 0;
+	jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
+	sb->s_errno    = cpu_to_be32(errcode);
 
 	jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
 }
@@ -2105,12 +2111,22 @@ void __jbd2_journal_abort_hard(journal_t *journal)
  * but don't do any other IO. */
 static void __journal_abort_soft (journal_t *journal, int errno)
 {
-	if (journal->j_flags & JBD2_ABORT)
-		return;
+	int old_errno;
 
-	if (!journal->j_errno)
+	write_lock(&journal->j_state_lock);
+	old_errno = journal->j_errno;
+	if (!journal->j_errno || errno == -ESHUTDOWN)
 		journal->j_errno = errno;
 
+	if (journal->j_flags & JBD2_ABORT) {
+		write_unlock(&journal->j_state_lock);
+		if (!old_errno && old_errno != -ESHUTDOWN &&
+		    errno == -ESHUTDOWN)
+			jbd2_journal_update_sb_errno(journal);
+		return;
+	}
+	write_unlock(&journal->j_state_lock);
+
 	__jbd2_journal_abort_hard(journal);
 
 	if (errno) {
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index f99910b69c78..a4967b27ffb6 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -600,8 +600,8 @@ static int do_one_pass(journal_t *journal,
 						success = -EFSBADCRC;
 						printk(KERN_ERR "JBD2: Invalid "
 						       "checksum recovering "
-						       "block %llu in log\n",
-						       blocknr);
+						       "data block %llu in "
+						       "log\n", blocknr);
 						block_error = 1;
 						goto skip_write;
 					}
diff --git a/fs/namei.c b/fs/namei.c
index 5c2b953c352d..a09419379f5d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1474,43 +1474,36 @@ static struct dentry *lookup_dcache(const struct qstr *name,
 }
 
 /*
- * Call i_op->lookup on the dentry.  The dentry must be negative and
- * unhashed.
- *
- * dir->d_inode->i_mutex must be held
+ * Parent directory has inode locked exclusive.  This is one
+ * and only case when ->lookup() gets called on non in-lookup
+ * dentries - as the matter of fact, this only gets called
+ * when directory is guaranteed to have no in-lookup children
+ * at all.
  */
-static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
-				  unsigned int flags)
-{
-	struct dentry *old;
-
-	/* Don't create child dentry for a dead directory. */
-	if (unlikely(IS_DEADDIR(dir))) {
-		dput(dentry);
-		return ERR_PTR(-ENOENT);
-	}
-
-	old = dir->i_op->lookup(dir, dentry, flags);
-	if (unlikely(old)) {
-		dput(dentry);
-		dentry = old;
-	}
-	return dentry;
-}
-
 static struct dentry *__lookup_hash(const struct qstr *name,
 		struct dentry *base, unsigned int flags)
 {
 	struct dentry *dentry = lookup_dcache(name, base, flags);
+	struct dentry *old;
+	struct inode *dir = base->d_inode;
 
 	if (dentry)
 		return dentry;
 
+	/* Don't create child dentry for a dead directory. */
+	if (unlikely(IS_DEADDIR(dir)))
+		return ERR_PTR(-ENOENT);
+
 	dentry = d_alloc(base, name);
 	if (unlikely(!dentry))
 		return ERR_PTR(-ENOMEM);
 
-	return lookup_real(base->d_inode, dentry, flags);
+	old = dir->i_op->lookup(dir, dentry, flags);
+	if (unlikely(old)) {
+		dput(dentry);
+		dentry = old;
+	}
+	return dentry;
 }
 
 static int lookup_fast(struct nameidata *nd,
diff --git a/fs/sync.c b/fs/sync.c
index 9908a114d506..b54e0541ad89 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,12 +192,8 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 
 	if (!file->f_op->fsync)
 		return -EINVAL;
-	if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
-		spin_lock(&inode->i_lock);
-		inode->i_state &= ~I_DIRTY_TIME;
-		spin_unlock(&inode->i_lock);
+	if (!datasync && (inode->i_state & I_DIRTY_TIME))
 		mark_inode_dirty_sync(inode);
-	}
 	return file->f_op->fsync(file, start, end, datasync);
 }
 EXPORT_SYMBOL(vfs_fsync_range);
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 393b6849aeb3..7bace03dc9dc 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -46,13 +46,13 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
 }
 
 void *
-kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+kmem_alloc_large(size_t size, xfs_km_flags_t flags)
 {
 	unsigned nofs_flag = 0;
 	void	*ptr;
 	gfp_t	lflags;
 
-	ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
+	ptr = kmem_alloc(size, flags | KM_MAYFAIL);
 	if (ptr)
 		return ptr;
 
@@ -67,7 +67,7 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
 		nofs_flag = memalloc_nofs_save();
 
 	lflags = kmem_flags_convert(flags);
-	ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL);
+	ptr = __vmalloc(size, lflags, PAGE_KERNEL);
 
 	if (flags & KM_NOFS)
 		memalloc_nofs_restore(nofs_flag);
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 4b87472f35bc..6023b594ead7 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -71,7 +71,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
 }
 
 extern void *kmem_alloc(size_t, xfs_km_flags_t);
-extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
+extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
 extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
 static inline void  kmem_free(const void *ptr)
 {
@@ -85,6 +85,12 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
 	return kmem_alloc(size, flags | KM_ZERO);
 }
 
+static inline void *
+kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+{
+	return kmem_alloc_large(size, flags | KM_ZERO);
+}
+
 /*
  * Zone interfaces
  */
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 2291f4224e24..03885a968de8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -95,13 +95,13 @@ xfs_ag_resv_critical(
 
 	switch (type) {
 	case XFS_AG_RESV_METADATA:
-		avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved;
+		avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
 		orig = pag->pag_meta_resv.ar_asked;
 		break;
-	case XFS_AG_RESV_AGFL:
+	case XFS_AG_RESV_RMAPBT:
 		avail = pag->pagf_freeblks + pag->pagf_flcount -
 			pag->pag_meta_resv.ar_reserved;
-		orig = pag->pag_agfl_resv.ar_asked;
+		orig = pag->pag_rmapbt_resv.ar_asked;
 		break;
 	default:
 		ASSERT(0);
@@ -126,10 +126,10 @@ xfs_ag_resv_needed(
 {
 	xfs_extlen_t			len;
 
-	len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved;
+	len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
 	switch (type) {
 	case XFS_AG_RESV_METADATA:
-	case XFS_AG_RESV_AGFL:
+	case XFS_AG_RESV_RMAPBT:
 		len -= xfs_perag_resv(pag, type)->ar_reserved;
 		break;
 	case XFS_AG_RESV_NONE:
@@ -160,10 +160,11 @@ __xfs_ag_resv_free(
 	if (pag->pag_agno == 0)
 		pag->pag_mount->m_ag_max_usable += resv->ar_asked;
 	/*
-	 * AGFL blocks are always considered "free", so whatever
-	 * was reserved at mount time must be given back at umount.
+	 * RMAPBT blocks come from the AGFL and AGFL blocks are always
+	 * considered "free", so whatever was reserved at mount time must be
+	 * given back at umount.
 	 */
-	if (type == XFS_AG_RESV_AGFL)
+	if (type == XFS_AG_RESV_RMAPBT)
 		oldresv = resv->ar_orig_reserved;
 	else
 		oldresv = resv->ar_reserved;
@@ -185,7 +186,7 @@ xfs_ag_resv_free(
 	int				error;
 	int				err2;
 
-	error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL);
+	error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
 	err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
 	if (err2 && !error)
 		error = err2;
@@ -284,15 +285,15 @@ xfs_ag_resv_init(
 		}
 	}
 
-	/* Create the AGFL metadata reservation */
-	if (pag->pag_agfl_resv.ar_asked == 0) {
+	/* Create the RMAPBT metadata reservation */
+	if (pag->pag_rmapbt_resv.ar_asked == 0) {
 		ask = used = 0;
 
 		error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
 		if (error)
 			goto out;
 
-		error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
+		error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
 		if (error)
 			goto out;
 	}
@@ -304,7 +305,7 @@ xfs_ag_resv_init(
 		return error;
 
 	ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
-	       xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
+	       xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
 	       pag->pagf_freeblks + pag->pagf_flcount);
 #endif
 out:
@@ -325,8 +326,10 @@ xfs_ag_resv_alloc_extent(
 	trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
 
 	switch (type) {
-	case XFS_AG_RESV_METADATA:
 	case XFS_AG_RESV_AGFL:
+		return;
+	case XFS_AG_RESV_METADATA:
+	case XFS_AG_RESV_RMAPBT:
 		resv = xfs_perag_resv(pag, type);
 		break;
 	default:
@@ -341,7 +344,7 @@ xfs_ag_resv_alloc_extent(
 
 	len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
 	resv->ar_reserved -= len;
-	if (type == XFS_AG_RESV_AGFL)
+	if (type == XFS_AG_RESV_RMAPBT)
 		return;
 	/* Allocations of reserved blocks only need on-disk sb updates... */
 	xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
@@ -365,8 +368,10 @@ xfs_ag_resv_free_extent(
 	trace_xfs_ag_resv_free_extent(pag, type, len);
 
 	switch (type) {
-	case XFS_AG_RESV_METADATA:
 	case XFS_AG_RESV_AGFL:
+		return;
+	case XFS_AG_RESV_METADATA:
+	case XFS_AG_RESV_RMAPBT:
 		resv = xfs_perag_resv(pag, type);
 		break;
 	default:
@@ -379,7 +384,7 @@ xfs_ag_resv_free_extent(
 
 	leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
 	resv->ar_reserved += leftover;
-	if (type == XFS_AG_RESV_AGFL)
+	if (type == XFS_AG_RESV_RMAPBT)
 		return;
 	/* Freeing into the reserved pool only requires on-disk update... */
 	xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index 8d6c687deef3..938f2f96c5e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -32,4 +32,35 @@ void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
 void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
 		struct xfs_trans *tp, xfs_extlen_t len);
 
+/*
+ * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from
+ * the AGFL, they are allocated one at a time and the reservation updates don't
+ * require a transaction.
+ */
+static inline void
+xfs_ag_resv_rmapbt_alloc(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno)
+{
+	struct xfs_alloc_arg	args = {0};
+	struct xfs_perag	*pag;
+
+	args.len = 1;
+	pag = xfs_perag_get(mp, agno);
+	xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
+	xfs_perag_put(pag);
+}
+
+static inline void
+xfs_ag_resv_rmapbt_free(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno)
+{
+	struct xfs_perag	*pag;
+
+	pag = xfs_perag_get(mp, agno);
+	xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
+	xfs_perag_put(pag);
+}
+
 #endif	/* __XFS_AG_RESV_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c02781a4c091..39387bdd225d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -53,6 +53,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
 		xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
 
+/*
+ * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
+ * the beginning of the block for a proper header with the location information
+ * and CRC.
+ */
+unsigned int
+xfs_agfl_size(
+	struct xfs_mount	*mp)
+{
+	unsigned int		size = mp->m_sb.sb_sectsize;
+
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		size -= sizeof(struct xfs_agfl);
+
+	return size / sizeof(xfs_agblock_t);
+}
+
 unsigned int
 xfs_refc_block(
 	struct xfs_mount	*mp)
@@ -550,7 +567,7 @@ xfs_agfl_verify(
 	if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
 		return __this_address;
 
-	for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
+	for (i = 0; i < xfs_agfl_size(mp); i++) {
 		if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
 		    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 			return __this_address;
@@ -1564,7 +1581,6 @@ xfs_alloc_ag_vextent_small(
 	int		*stat)	/* status: 0-freelist, 1-normal/none */
 {
 	struct xfs_owner_info	oinfo;
-	struct xfs_perag	*pag;
 	int		error;
 	xfs_agblock_t	fbno;
 	xfs_extlen_t	flen;
@@ -1616,18 +1632,13 @@ xfs_alloc_ag_vextent_small(
 			/*
 			 * If we're feeding an AGFL block to something that
 			 * doesn't live in the free space, we need to clear
-			 * out the OWN_AG rmap and add the block back to
-			 * the AGFL per-AG reservation.
+			 * out the OWN_AG rmap.
 			 */
 			xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
 			error = xfs_rmap_free(args->tp, args->agbp, args->agno,
 					fbno, 1, &oinfo);
 			if (error)
 				goto error0;
-			pag = xfs_perag_get(args->mp, args->agno);
-			xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
-					args->tp, 1);
-			xfs_perag_put(pag);
 
 			*stat = 0;
 			return 0;
@@ -1911,14 +1922,12 @@ xfs_free_ag_extent(
 	XFS_STATS_INC(mp, xs_freex);
 	XFS_STATS_ADD(mp, xs_freeb, len);
 
-	trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
-			haveleft, haveright);
+	trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
 
 	return 0;
 
  error0:
-	trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
-			-1, -1);
+	trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
 	if (bno_cur)
 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 	if (cnt_cur)
@@ -2054,6 +2063,93 @@ xfs_alloc_space_available(
 }
 
 /*
+ * Check the agfl fields of the agf for inconsistency or corruption. The purpose
+ * is to detect an agfl header padding mismatch between current and early v5
+ * kernels. This problem manifests as a 1-slot size difference between the
+ * on-disk flcount and the active [first, last] range of a wrapped agfl. This
+ * may also catch variants of agfl count corruption unrelated to padding. Either
+ * way, we'll reset the agfl and warn the user.
+ *
+ * Return true if a reset is required before the agfl can be used, false
+ * otherwise.
+ */
+static bool
+xfs_agfl_needs_reset(
+	struct xfs_mount	*mp,
+	struct xfs_agf		*agf)
+{
+	uint32_t		f = be32_to_cpu(agf->agf_flfirst);
+	uint32_t		l = be32_to_cpu(agf->agf_fllast);
+	uint32_t		c = be32_to_cpu(agf->agf_flcount);
+	int			agfl_size = xfs_agfl_size(mp);
+	int			active;
+
+	/* no agfl header on v4 supers */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return false;
+
+	/*
+	 * The agf read verifier catches severe corruption of these fields.
+	 * Repeat some sanity checks to cover a packed -> unpacked mismatch if
+	 * the verifier allows it.
+	 */
+	if (f >= agfl_size || l >= agfl_size)
+		return true;
+	if (c > agfl_size)
+		return true;
+
+	/*
+	 * Check consistency between the on-disk count and the active range. An
+	 * agfl padding mismatch manifests as an inconsistent flcount.
+	 */
+	if (c && l >= f)
+		active = l - f + 1;
+	else if (c)
+		active = agfl_size - f + l + 1;
+	else
+		active = 0;
+
+	return active != c;
+}
+
+/*
+ * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
+ * agfl content cannot be trusted. Warn the user that a repair is required to
+ * recover leaked blocks.
+ *
+ * The purpose of this mechanism is to handle filesystems affected by the agfl
+ * header padding mismatch problem. A reset keeps the filesystem online with a
+ * relatively minor free space accounting inconsistency rather than suffer the
+ * inevitable crash from use of an invalid agfl block.
+ */
+static void
+xfs_agfl_reset(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	struct xfs_perag	*pag)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
+
+	ASSERT(pag->pagf_agflreset);
+	trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
+
+	xfs_warn(mp,
+	       "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
+	       "Please unmount and run xfs_repair.",
+	         pag->pag_agno, pag->pagf_flcount);
+
+	agf->agf_flfirst = 0;
+	agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
+	agf->agf_flcount = 0;
+	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
+				    XFS_AGF_FLCOUNT);
+
+	pag->pagf_flcount = 0;
+	pag->pagf_agflreset = false;
+}
+
+/*
  * Decide whether to use this allocation group for this allocation.
  * If so, fix up the btree freelist's size.
  */
@@ -2114,6 +2210,10 @@ xfs_alloc_fix_freelist(
 		}
 	}
 
+	/* reset a padding mismatched agfl before final free space check */
+	if (pag->pagf_agflreset)
+		xfs_agfl_reset(tp, agbp, pag);
+
 	/* If there isn't enough total space or single-extent, reject it. */
 	need = xfs_alloc_min_freelist(mp, pag);
 	if (!xfs_alloc_space_available(args, need, flags))
@@ -2266,10 +2366,11 @@ xfs_alloc_get_freelist(
 	bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
 	be32_add_cpu(&agf->agf_flfirst, 1);
 	xfs_trans_brelse(tp, agflbp);
-	if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
+	if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
 		agf->agf_flfirst = 0;
 
 	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+	ASSERT(!pag->pagf_agflreset);
 	be32_add_cpu(&agf->agf_flcount, -1);
 	xfs_trans_agflist_delta(tp, -1);
 	pag->pagf_flcount--;
@@ -2377,10 +2478,11 @@ xfs_alloc_put_freelist(
 			be32_to_cpu(agf->agf_seqno), &agflbp)))
 		return error;
 	be32_add_cpu(&agf->agf_fllast, 1);
-	if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
+	if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
 		agf->agf_fllast = 0;
 
 	pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+	ASSERT(!pag->pagf_agflreset);
 	be32_add_cpu(&agf->agf_flcount, 1);
 	xfs_trans_agflist_delta(tp, 1);
 	pag->pagf_flcount++;
@@ -2395,7 +2497,7 @@ xfs_alloc_put_freelist(
 
 	xfs_alloc_log_agf(tp, agbp, logflags);
 
-	ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
+	ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
 
 	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
 	blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
@@ -2428,9 +2530,9 @@ xfs_agf_verify(
 	if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
 	      XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
 	      be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
-	      be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
-	      be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
-	      be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
+	      be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
+	      be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
+	      be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
 		return __this_address;
 
 	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
@@ -2588,6 +2690,7 @@ xfs_alloc_read_agf(
 		pag->pagb_count = 0;
 		pag->pagb_tree = RB_ROOT;
 		pag->pagf_init = 1;
+		pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
 	}
 #ifdef DEBUG
 	else if (!XFS_FORCED_SHUTDOWN(mp)) {
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 65a0cafe06e4..a311a2414a6b 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -26,6 +26,8 @@ struct xfs_trans;
 
 extern struct workqueue_struct *xfs_alloc_wq;
 
+unsigned int xfs_agfl_size(struct xfs_mount *mp);
+
 /*
  * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
  */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 6840b588187e..b451649ba176 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -74,18 +74,13 @@ xfs_allocbt_alloc_block(
 	int			error;
 	xfs_agblock_t		bno;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
 	/* Allocate the new block from the freelist. If we can't, give up.  */
 	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
 				       &bno, 1);
-	if (error) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+	if (error)
 		return error;
-	}
 
 	if (bno == NULLAGBLOCK) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
 	}
@@ -95,7 +90,6 @@ xfs_allocbt_alloc_block(
 	xfs_trans_agbtree_delta(cur->bc_tp, 1);
 	new->s = cpu_to_be32(bno);
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index daae00ed30c5..3b03d886df66 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1244,8 +1244,9 @@ xfs_iread_extents(
 			xfs_warn(ip->i_mount,
 				"corrupt dinode %Lu, (btree extents).",
 				(unsigned long long) ip->i_ino);
-			XFS_CORRUPTION_ERROR(__func__,
-				XFS_ERRLEVEL_LOW, ip->i_mount, block);
+			xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+					__func__, block, sizeof(*block),
+					__this_address);
 			error = -EFSCORRUPTED;
 			goto out_brelse;
 		}
@@ -1261,11 +1262,15 @@ xfs_iread_extents(
 		 */
 		frp = XFS_BMBT_REC_ADDR(mp, block, 1);
 		for (j = 0; j < num_recs; j++, frp++, i++) {
+			xfs_failaddr_t	fa;
+
 			xfs_bmbt_disk_get_all(frp, &new);
-			if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
-				XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
-						 XFS_ERRLEVEL_LOW, mp);
+			fa = xfs_bmap_validate_extent(ip, whichfork, &new);
+			if (fa) {
 				error = -EFSCORRUPTED;
+				xfs_inode_verifier_error(ip, error,
+						"xfs_iread_extents(2)",
+						frp, sizeof(*frp), fa);
 				goto out_brelse;
 			}
 			xfs_iext_insert(ip, &icur, &new, state);
@@ -6154,3 +6159,39 @@ xfs_bmap_finish_one(
 
 	return error;
 }
+
+/* Check that an inode's extent does not have invalid flags or bad ranges. */
+xfs_failaddr_t
+xfs_bmap_validate_extent(
+	struct xfs_inode	*ip,
+	int			whichfork,
+	struct xfs_bmbt_irec	*irec)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_fsblock_t		endfsb;
+	bool			isrt;
+
+	isrt = XFS_IS_REALTIME_INODE(ip);
+	endfsb = irec->br_startblock + irec->br_blockcount - 1;
+	if (isrt) {
+		if (!xfs_verify_rtbno(mp, irec->br_startblock))
+			return __this_address;
+		if (!xfs_verify_rtbno(mp, endfsb))
+			return __this_address;
+	} else {
+		if (!xfs_verify_fsbno(mp, irec->br_startblock))
+			return __this_address;
+		if (!xfs_verify_fsbno(mp, endfsb))
+			return __this_address;
+		if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
+		    XFS_FSB_TO_AGNO(mp, endfsb))
+			return __this_address;
+	}
+	if (irec->br_state != XFS_EXT_NORM) {
+		if (whichfork != XFS_DATA_FORK)
+			return __this_address;
+		if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
+			return __this_address;
+	}
+	return NULL;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index e36d75799cd5..f3be6416260b 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -274,4 +274,7 @@ static inline int xfs_bmap_fork_to_state(int whichfork)
 	}
 }
 
+xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork,
+		struct xfs_bmbt_irec *irec);
+
 #endif	/* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 9faf479aba49..d89d06bea6e3 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -272,10 +272,10 @@ xfs_bmbt_alloc_block(
 		cur->bc_private.b.dfops->dop_low = true;
 	}
 	if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
 	}
+
 	ASSERT(args.len == 1);
 	cur->bc_private.b.firstblock = args.fsbno;
 	cur->bc_private.b.allocated++;
@@ -286,12 +286,10 @@ xfs_bmbt_alloc_block(
 
 	new->l = cpu_to_be64(args.fsbno);
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 
  error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 135b8c56d23e..e4505746ccaa 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -118,18 +118,4 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
 extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
 		struct xfs_trans *, struct xfs_inode *, int);
 
-/*
- * Check that the extent does not contain an invalid unwritten extent flag.
- */
-static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork,
-		struct xfs_bmbt_irec *irec)
-{
-	if (irec->br_state == XFS_EXT_NORM)
-		return true;
-	if (whichfork == XFS_DATA_FORK &&
-	    xfs_sb_version_hasextflgbit(&mp->m_sb))
-		return true;
-	return false;
-}
-
 #endif	/* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 79ee4a1951d1..edc0193358a5 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1438,8 +1438,6 @@ xfs_btree_log_keys(
 	int			first,
 	int			last)
 {
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
 
 	if (bp) {
 		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
@@ -1450,8 +1448,6 @@ xfs_btree_log_keys(
 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
 				xfs_ilog_fbroot(cur->bc_private.b.whichfork));
 	}
-
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 }
 
 /*
@@ -1464,15 +1460,12 @@ xfs_btree_log_recs(
 	int			first,
 	int			last)
 {
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
 
 	xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
 	xfs_trans_log_buf(cur->bc_tp, bp,
 			  xfs_btree_rec_offset(cur, first),
 			  xfs_btree_rec_offset(cur, last + 1) - 1);
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 }
 
 /*
@@ -1485,8 +1478,6 @@ xfs_btree_log_ptrs(
 	int			first,	/* index of first pointer to log */
 	int			last)	/* index of last pointer to log */
 {
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
 
 	if (bp) {
 		struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
@@ -1501,7 +1492,6 @@ xfs_btree_log_ptrs(
 			xfs_ilog_fbroot(cur->bc_private.b.whichfork));
 	}
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 }
 
 /*
@@ -1543,9 +1533,6 @@ xfs_btree_log_block(
 		XFS_BTREE_LBLOCK_CRC_LEN
 	};
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
-
 	if (bp) {
 		int nbits;
 
@@ -1573,8 +1560,6 @@ xfs_btree_log_block(
 		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
 			xfs_ilog_fbroot(cur->bc_private.b.whichfork));
 	}
-
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 }
 
 /*
@@ -1593,9 +1578,6 @@ xfs_btree_increment(
 	int			error;		/* error return value */
 	int			lev;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGI(cur, level);
-
 	ASSERT(level < cur->bc_nlevels);
 
 	/* Read-ahead to the right at this level. */
@@ -1671,17 +1653,14 @@ xfs_btree_increment(
 		cur->bc_ptrs[lev] = 1;
 	}
 out1:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 
 out0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 0;
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
@@ -1701,9 +1680,6 @@ xfs_btree_decrement(
 	int			lev;
 	union xfs_btree_ptr	ptr;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGI(cur, level);
-
 	ASSERT(level < cur->bc_nlevels);
 
 	/* Read-ahead to the left at this level. */
@@ -1769,17 +1745,14 @@ xfs_btree_decrement(
 		cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
 	}
 out1:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 
 out0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 0;
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
@@ -1881,9 +1854,6 @@ xfs_btree_lookup(
 	union xfs_btree_ptr	*pp;	/* ptr to btree block */
 	union xfs_btree_ptr	ptr;	/* ptr to btree block */
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGI(cur, dir);
-
 	XFS_BTREE_STATS_INC(cur, lookup);
 
 	/* No such thing as a zero-level tree. */
@@ -1929,7 +1899,6 @@ xfs_btree_lookup(
 				ASSERT(level == 0 && cur->bc_nlevels == 1);
 
 				cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
-				XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 				*stat = 0;
 				return 0;
 			}
@@ -2004,7 +1973,6 @@ xfs_btree_lookup(
 			if (error)
 				goto error0;
 			XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
-			XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 			*stat = 1;
 			return 0;
 		}
@@ -2019,11 +1987,9 @@ xfs_btree_lookup(
 		*stat = 1;
 	else
 		*stat = 0;
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
@@ -2169,10 +2135,8 @@ __xfs_btree_updkeys(
 		trace_xfs_btree_updkeys(cur, level, bp);
 #ifdef DEBUG
 		error = xfs_btree_check_block(cur, block, level, bp);
-		if (error) {
-			XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+		if (error)
 			return error;
-		}
 #endif
 		ptr = cur->bc_ptrs[level];
 		nlkey = xfs_btree_key_addr(cur, ptr, block);
@@ -2224,9 +2188,6 @@ xfs_btree_update_keys(
 	if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
 		return __xfs_btree_updkeys(cur, level, block, bp, false);
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
-
 	/*
 	 * Go up the tree from this level toward the root.
 	 * At each level, update the key value to the value input.
@@ -2241,10 +2202,8 @@ xfs_btree_update_keys(
 		block = xfs_btree_get_block(cur, level, &bp);
 #ifdef DEBUG
 		error = xfs_btree_check_block(cur, block, level, bp);
-		if (error) {
-			XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+		if (error)
 			return error;
-		}
 #endif
 		ptr = cur->bc_ptrs[level];
 		kp = xfs_btree_key_addr(cur, ptr, block);
@@ -2252,7 +2211,6 @@ xfs_btree_update_keys(
 		xfs_btree_log_keys(cur, bp, ptr, ptr);
 	}
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	return 0;
 }
 
@@ -2272,9 +2230,6 @@ xfs_btree_update(
 	int			ptr;
 	union xfs_btree_rec	*rp;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGR(cur, rec);
-
 	/* Pick up the current block. */
 	block = xfs_btree_get_block(cur, 0, &bp);
 
@@ -2307,11 +2262,9 @@ xfs_btree_update(
 			goto error0;
 	}
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
@@ -2339,9 +2292,6 @@ xfs_btree_lshift(
 	int			error;		/* error return value */
 	int			i;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGI(cur, level);
-
 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 	    level == cur->bc_nlevels - 1)
 		goto out0;
@@ -2500,21 +2450,17 @@ xfs_btree_lshift(
 	/* Slide the cursor value left one. */
 	cur->bc_ptrs[level]--;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 
 out0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 0;
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 
 error1:
-	XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
 	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
 	return error;
 }
@@ -2541,9 +2487,6 @@ xfs_btree_rshift(
 	int			error;		/* error return value */
 	int			i;		/* loop counter */
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGI(cur, level);
-
 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 	    (level == cur->bc_nlevels - 1))
 		goto out0;
@@ -2676,21 +2619,17 @@ xfs_btree_rshift(
 
 	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 
 out0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 0;
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 
 error1:
-	XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
 	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
 	return error;
 }
@@ -2726,9 +2665,6 @@ __xfs_btree_split(
 	int			i;
 #endif
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
-
 	XFS_BTREE_STATS_INC(cur, split);
 
 	/* Set up left block (current one). */
@@ -2878,16 +2814,13 @@ __xfs_btree_split(
 		(*curp)->bc_ptrs[level + 1]++;
 	}
 	*ptrp = rptr;
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 out0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 0;
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
@@ -2994,7 +2927,6 @@ xfs_btree_new_iroot(
 	int			i;		/* loop counter */
 #endif
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
 	XFS_BTREE_STATS_INC(cur, newroot);
 
 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
@@ -3008,10 +2940,9 @@ xfs_btree_new_iroot(
 	error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
 	if (error)
 		goto error0;
-	if (*stat == 0) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+	if (*stat == 0)
 		return 0;
-	}
+
 	XFS_BTREE_STATS_INC(cur, alloc);
 
 	/* Copy the root into a real block. */
@@ -3074,10 +3005,8 @@ xfs_btree_new_iroot(
 	*logflags |=
 		XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
 	*stat = 1;
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	return 0;
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
@@ -3102,7 +3031,6 @@ xfs_btree_new_root(
 	union xfs_btree_ptr	rptr;
 	union xfs_btree_ptr	lptr;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
 	XFS_BTREE_STATS_INC(cur, newroot);
 
 	/* initialise our start point from the cursor */
@@ -3202,14 +3130,11 @@ xfs_btree_new_root(
 	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
 	cur->bc_ptrs[cur->bc_nlevels] = nptr;
 	cur->bc_nlevels++;
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 out0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 0;
 	return 0;
 }
@@ -3230,7 +3155,7 @@ xfs_btree_make_block_unfull(
 
 	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
 	    level == cur->bc_nlevels - 1) {
-	    	struct xfs_inode *ip = cur->bc_private.b.ip;
+		struct xfs_inode *ip = cur->bc_private.b.ip;
 
 		if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
 			/* A root block that can be made bigger. */
@@ -3309,9 +3234,6 @@ xfs_btree_insrec(
 #endif
 	xfs_daddr_t		old_bn;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
-
 	ncur = NULL;
 	lkey = &nkey;
 
@@ -3324,14 +3246,12 @@ xfs_btree_insrec(
 		error = xfs_btree_new_root(cur, stat);
 		xfs_btree_set_ptr_null(cur, ptrp);
 
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		return error;
 	}
 
 	/* If we're off the left edge, return failure. */
 	ptr = cur->bc_ptrs[level];
 	if (ptr == 0) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
 	}
@@ -3489,12 +3409,10 @@ xfs_btree_insrec(
 		*curp = ncur;
 	}
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
@@ -3572,11 +3490,9 @@ xfs_btree_insert(
 		}
 	} while (!xfs_btree_ptr_is_null(cur, &nptr));
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = i;
 	return 0;
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
@@ -3611,8 +3527,6 @@ xfs_btree_kill_iroot(
 	int			i;
 #endif
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
 	ASSERT(cur->bc_nlevels > 1);
 
@@ -3670,19 +3584,15 @@ xfs_btree_kill_iroot(
 #ifdef DEBUG
 	for (i = 0; i < numrecs; i++) {
 		error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
-		if (error) {
-			XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+		if (error)
 			return error;
-		}
 	}
 #endif
 	xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
 
 	error = xfs_btree_free_block(cur, cbp);
-	if (error) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+	if (error)
 		return error;
-	}
 
 	cur->bc_bufs[level - 1] = NULL;
 	be16_add_cpu(&block->bb_level, -1);
@@ -3690,7 +3600,6 @@ xfs_btree_kill_iroot(
 		XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
 	cur->bc_nlevels--;
 out0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	return 0;
 }
 
@@ -3706,7 +3615,6 @@ xfs_btree_kill_root(
 {
 	int			error;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
 	XFS_BTREE_STATS_INC(cur, killroot);
 
 	/*
@@ -3716,16 +3624,13 @@ xfs_btree_kill_root(
 	cur->bc_ops->set_root(cur, newroot, -1);
 
 	error = xfs_btree_free_block(cur, bp);
-	if (error) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+	if (error)
 		return error;
-	}
 
 	cur->bc_bufs[level] = NULL;
 	cur->bc_ra[level] = 0;
 	cur->bc_nlevels--;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	return 0;
 }
 
@@ -3744,7 +3649,6 @@ xfs_btree_dec_cursor(
 			return error;
 	}
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 }
@@ -3780,15 +3684,11 @@ xfs_btree_delrec(
 	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
 	int			numrecs;	/* temporary numrec count */
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-	XFS_BTREE_TRACE_ARGI(cur, level);
-
 	tcur = NULL;
 
 	/* Get the index of the entry being deleted, check for nothing there. */
 	ptr = cur->bc_ptrs[level];
 	if (ptr == 0) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
 	}
@@ -3805,7 +3705,6 @@ xfs_btree_delrec(
 
 	/* Fail if we're off the end of the block. */
 	if (ptr > numrecs) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
 	}
@@ -4080,7 +3979,7 @@ xfs_btree_delrec(
 				tcur = NULL;
 				if (level == 0)
 					cur->bc_ptrs[0]++;
-				XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+
 				*stat = 1;
 				return 0;
 			}
@@ -4250,13 +4149,11 @@ xfs_btree_delrec(
 	 * call updkeys directly.
 	 */
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	/* Return value means the next level up has something to do. */
 	*stat = 2;
 	return 0;
 
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	if (tcur)
 		xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
 	return error;
@@ -4277,8 +4174,6 @@ xfs_btree_delete(
 	int			i;
 	bool			joined = false;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
 	/*
 	 * Go up the tree, starting at leaf level.
 	 *
@@ -4314,11 +4209,9 @@ xfs_btree_delete(
 		}
 	}
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = i;
 	return 0;
 error0:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 50440b5618e8..58e30c0975c3 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -473,25 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
 #define	XFS_FILBLKS_MIN(a,b)	min_t(xfs_filblks_t, (a), (b))
 #define	XFS_FILBLKS_MAX(a,b)	max_t(xfs_filblks_t, (a), (b))
 
-/*
- * Trace hooks.  Currently not implemented as they need to be ported
- * over to the generic tracing functionality, which is some effort.
- *
- * i,j = integer (32 bit)
- * b = btree block buffer (xfs_buf_t)
- * p = btree ptr
- * r = btree record
- * k = btree key
- */
-#define	XFS_BTREE_TRACE_ARGBI(c, b, i)
-#define	XFS_BTREE_TRACE_ARGBII(c, b, i, j)
-#define	XFS_BTREE_TRACE_ARGI(c, i)
-#define	XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
-#define	XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
-#define	XFS_BTREE_TRACE_ARGIK(c, i, k)
-#define XFS_BTREE_TRACE_ARGR(c, r)
-#define	XFS_BTREE_TRACE_CURSOR(c, t)
-
 xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
 xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp,
 		unsigned int max_recs);
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 388d67c5c903..989e95a53db2 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -173,7 +173,7 @@ extern void xfs_dir2_data_log_unused(struct xfs_da_args *args,
 extern void xfs_dir2_data_make_free(struct xfs_da_args *args,
 		struct xfs_buf *bp, xfs_dir2_data_aoff_t offset,
 		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_da_args *args,
+extern int xfs_dir2_data_use_free(struct xfs_da_args *args,
 		struct xfs_buf *bp, struct xfs_dir2_data_unused *dup,
 		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
 		int *needlogp, int *needscanp);
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 2da86a394bcf..875893ded514 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -451,15 +451,19 @@ xfs_dir2_block_addname(
 	 * No stale entries, will use enddup space to hold new leaf.
 	 */
 	if (!btp->stale) {
+		xfs_dir2_data_aoff_t	aoff;
+
 		/*
 		 * Mark the space needed for the new leaf entry, now in use.
 		 */
-		xfs_dir2_data_use_free(args, bp, enddup,
-			(xfs_dir2_data_aoff_t)
-			((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
-			 sizeof(*blp)),
-			(xfs_dir2_data_aoff_t)sizeof(*blp),
-			&needlog, &needscan);
+		aoff = (xfs_dir2_data_aoff_t)((char *)enddup - (char *)hdr +
+				be16_to_cpu(enddup->length) - sizeof(*blp));
+		error = xfs_dir2_data_use_free(args, bp, enddup, aoff,
+				(xfs_dir2_data_aoff_t)sizeof(*blp), &needlog,
+				&needscan);
+		if (error)
+			return error;
+
 		/*
 		 * Update the tail (entry count).
 		 */
@@ -541,9 +545,11 @@ xfs_dir2_block_addname(
 	/*
 	 * Mark space for the data entry used.
 	 */
-	xfs_dir2_data_use_free(args, bp, dup,
-		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
-		(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
+	error = xfs_dir2_data_use_free(args, bp, dup,
+			(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+			(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
+	if (error)
+		return error;
 	/*
 	 * Create the new data entry.
 	 */
@@ -997,8 +1003,10 @@ xfs_dir2_leaf_to_block(
 	/*
 	 * Use up the space at the end of the block (blp/btp).
 	 */
-	xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size,
-		&needlog, &needscan);
+	error = xfs_dir2_data_use_free(args, dbp, dup,
+			args->geo->blksize - size, size, &needlog, &needscan);
+	if (error)
+		return error;
 	/*
 	 * Initialize the block tail.
 	 */
@@ -1110,18 +1118,14 @@ xfs_dir2_sf_to_block(
 	 * Add block 0 to the inode.
 	 */
 	error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
-	if (error) {
-		kmem_free(sfp);
-		return error;
-	}
+	if (error)
+		goto out_free;
 	/*
 	 * Initialize the data block, then convert it to block format.
 	 */
 	error = xfs_dir3_data_init(args, blkno, &bp);
-	if (error) {
-		kmem_free(sfp);
-		return error;
-	}
+	if (error)
+		goto out_free;
 	xfs_dir3_block_init(mp, tp, bp, dp);
 	hdr = bp->b_addr;
 
@@ -1136,8 +1140,10 @@ xfs_dir2_sf_to_block(
 	 */
 	dup = dp->d_ops->data_unused_p(hdr);
 	needlog = needscan = 0;
-	xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
-			       i, &needlog, &needscan);
+	error = xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
+			i, &needlog, &needscan);
+	if (error)
+		goto out_free;
 	ASSERT(needscan == 0);
 	/*
 	 * Fill in the tail.
@@ -1150,9 +1156,11 @@ xfs_dir2_sf_to_block(
 	/*
 	 * Remove the freespace, we'll manage it.
 	 */
-	xfs_dir2_data_use_free(args, bp, dup,
-		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
-		be16_to_cpu(dup->length), &needlog, &needscan);
+	error = xfs_dir2_data_use_free(args, bp, dup,
+			(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+			be16_to_cpu(dup->length), &needlog, &needscan);
+	if (error)
+		goto out_free;
 	/*
 	 * Create entry for .
 	 */
@@ -1256,4 +1264,7 @@ xfs_dir2_sf_to_block(
 	xfs_dir2_block_log_tail(tp, bp);
 	xfs_dir3_data_check(dp, bp);
 	return 0;
+out_free:
+	kmem_free(sfp);
+	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 920279485275..cb67ec730b9b 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -932,10 +932,51 @@ xfs_dir2_data_make_free(
 	*needscanp = needscan;
 }
 
+/* Check our free data for obvious signs of corruption. */
+static inline xfs_failaddr_t
+xfs_dir2_data_check_free(
+	struct xfs_dir2_data_hdr	*hdr,
+	struct xfs_dir2_data_unused	*dup,
+	xfs_dir2_data_aoff_t		offset,
+	xfs_dir2_data_aoff_t		len)
+{
+	if (hdr->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC) &&
+	    hdr->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC) &&
+	    hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) &&
+	    hdr->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
+		return __this_address;
+	if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG)
+		return __this_address;
+	if (offset < (char *)dup - (char *)hdr)
+		return __this_address;
+	if (offset + len > (char *)dup + be16_to_cpu(dup->length) - (char *)hdr)
+		return __this_address;
+	if ((char *)dup - (char *)hdr !=
+			be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)))
+		return __this_address;
+	return NULL;
+}
+
+/* Sanity-check a new bestfree entry. */
+static inline xfs_failaddr_t
+xfs_dir2_data_check_new_free(
+	struct xfs_dir2_data_hdr	*hdr,
+	struct xfs_dir2_data_free	*dfp,
+	struct xfs_dir2_data_unused	*newdup)
+{
+	if (dfp == NULL)
+		return __this_address;
+	if (dfp->length != newdup->length)
+		return __this_address;
+	if (be16_to_cpu(dfp->offset) != (char *)newdup - (char *)hdr)
+		return __this_address;
+	return NULL;
+}
+
 /*
  * Take a byte range out of an existing unused space and make it un-free.
  */
-void
+int
 xfs_dir2_data_use_free(
 	struct xfs_da_args	*args,
 	struct xfs_buf		*bp,
@@ -947,23 +988,19 @@ xfs_dir2_data_use_free(
 {
 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
 	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
+	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
+	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
+	struct xfs_dir2_data_free *bf;
+	xfs_failaddr_t		fa;
 	int			matchback;	/* matches end of freespace */
 	int			matchfront;	/* matches start of freespace */
 	int			needscan;	/* need to regen bestfree */
-	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
-	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
 	int			oldlen;		/* old unused entry's length */
-	struct xfs_dir2_data_free *bf;
 
 	hdr = bp->b_addr;
-	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
-	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
-	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
-	ASSERT(offset >= (char *)dup - (char *)hdr);
-	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
-	ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+	fa = xfs_dir2_data_check_free(hdr, dup, offset, len);
+	if (fa)
+		goto corrupt;
 	/*
 	 * Look up the entry in the bestfree table.
 	 */
@@ -1008,9 +1045,9 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
 						       needlogp);
-			ASSERT(dfp != NULL);
-			ASSERT(dfp->length == newdup->length);
-			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
+			fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup);
+			if (fa)
+				goto corrupt;
 			/*
 			 * If we got inserted at the last slot,
 			 * that means we don't know if there was a better
@@ -1036,9 +1073,9 @@ xfs_dir2_data_use_free(
 			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
 			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
 						       needlogp);
-			ASSERT(dfp != NULL);
-			ASSERT(dfp->length == newdup->length);
-			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
+			fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup);
+			if (fa)
+				goto corrupt;
 			/*
 			 * If we got inserted at the last slot,
 			 * that means we don't know if there was a better
@@ -1084,6 +1121,11 @@ xfs_dir2_data_use_free(
 		}
 	}
 	*needscanp = needscan;
+	return 0;
+corrupt:
+	xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
+			hdr, __FILE__, __LINE__, fa);
+	return -EFSCORRUPTED;
 }
 
 /* Find the end of the entry data in a data/block format dir block. */
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index d7e630f41f9c..50fc9c0c5e2b 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -877,9 +877,13 @@ xfs_dir2_leaf_addname(
 	/*
 	 * Mark the initial part of our freespace in use for the new entry.
 	 */
-	xfs_dir2_data_use_free(args, dbp, dup,
-		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
-		&needlog, &needscan);
+	error = xfs_dir2_data_use_free(args, dbp, dup,
+			(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+			length, &needlog, &needscan);
+	if (error) {
+		xfs_trans_brelse(tp, lbp);
+		return error;
+	}
 	/*
 	 * Initialize our new entry (at last).
 	 */
@@ -1415,7 +1419,8 @@ xfs_dir2_leaf_removename(
 	oldbest = be16_to_cpu(bf[0].length);
 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
-	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
+	if (be16_to_cpu(bestsp[db]) != oldbest)
+		return -EFSCORRUPTED;
 	/*
 	 * Mark the former data entry unused.
 	 */
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 239d97a64296..9df096cc3c37 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -387,8 +387,9 @@ xfs_dir2_leaf_to_node(
 	dp->d_ops->free_hdr_from_disk(&freehdr, free);
 	leaf = lbp->b_addr;
 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
-	ASSERT(be32_to_cpu(ltp->bestcount) <=
-				(uint)dp->i_d.di_size / args->geo->blksize);
+	if (be32_to_cpu(ltp->bestcount) >
+				(uint)dp->i_d.di_size / args->geo->blksize)
+		return -EFSCORRUPTED;
 
 	/*
 	 * Copy freespace entries from the leaf block to the new block.
@@ -1728,6 +1729,7 @@ xfs_dir2_node_addname_int(
 	__be16			*bests;
 	struct xfs_dir3_icfree_hdr freehdr;
 	struct xfs_dir2_data_free *bf;
+	xfs_dir2_data_aoff_t	aoff;
 
 	dp = args->dp;
 	mp = dp->i_mount;
@@ -2022,9 +2024,13 @@ xfs_dir2_node_addname_int(
 	/*
 	 * Mark the first part of the unused space, inuse for us.
 	 */
-	xfs_dir2_data_use_free(args, dbp, dup,
-		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
-		&needlog, &needscan);
+	aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
+	error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length,
+			&needlog, &needscan);
+	if (error) {
+		xfs_trans_brelse(tp, dbp);
+		return error;
+	}
 	/*
 	 * Fill in the new entry and log it.
 	 */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1acb584fc5f7..42956d8d95ed 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -803,24 +803,13 @@ typedef struct xfs_agi {
 		&(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
 		(__be32 *)(bp)->b_addr)
 
-/*
- * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of
- * slots in the beginning of the block for a proper header with the
- * location information and CRC.
- */
-#define XFS_AGFL_SIZE(mp) \
-	(((mp)->m_sb.sb_sectsize - \
-	 (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
-		sizeof(struct xfs_agfl) : 0)) / \
-	  sizeof(xfs_agblock_t))
-
 typedef struct xfs_agfl {
 	__be32		agfl_magicnum;
 	__be32		agfl_seqno;
 	uuid_t		agfl_uuid;
 	__be64		agfl_lsn;
 	__be32		agfl_crc;
-	__be32		agfl_bno[];	/* actually XFS_AGFL_SIZE(mp) */
+	__be32		agfl_bno[];	/* actually xfs_agfl_size(mp) */
 } __attribute__((packed)) xfs_agfl_t;
 
 #define XFS_AGFL_CRC_OFF	offsetof(struct xfs_agfl, agfl_crc)
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index af197a5f3a82..a2dd7f4a2719 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -93,8 +93,6 @@ __xfs_inobt_alloc_block(
 	int			error;		/* error return value */
 	xfs_agblock_t		sbno = be32_to_cpu(start->s);
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
 	memset(&args, 0, sizeof(args));
 	args.tp = cur->bc_tp;
 	args.mp = cur->bc_mp;
@@ -107,17 +105,14 @@ __xfs_inobt_alloc_block(
 	args.resv = resv;
 
 	error = xfs_alloc_vextent(&args);
-	if (error) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+	if (error)
 		return error;
-	}
+
 	if (args.fsbno == NULLFSBLOCK) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
 	}
 	ASSERT(args.len == 1);
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 
 	new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
 	*stat = 1;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 4fe17b368316..ef68b1de006a 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -93,20 +93,26 @@ xfs_inode_buf_verify(
 	bool		readahead)
 {
 	struct xfs_mount *mp = bp->b_target->bt_mount;
+	xfs_agnumber_t	agno;
 	int		i;
 	int		ni;
 
 	/*
 	 * Validate the magic number and version of every inode in the buffer
 	 */
+	agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
 	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
 	for (i = 0; i < ni; i++) {
 		int		di_ok;
 		xfs_dinode_t	*dip;
+		xfs_agino_t	unlinked_ino;
 
 		dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
+		unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
 		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
-			xfs_dinode_good_version(mp, dip->di_version);
+			xfs_dinode_good_version(mp, dip->di_version) &&
+			(unlinked_ino == NULLAGINO ||
+			 xfs_verify_agino(mp, agno, unlinked_ino));
 		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
 						XFS_ERRTAG_ITOBP_INOTOBP))) {
 			if (readahead) {
@@ -115,16 +121,18 @@ xfs_inode_buf_verify(
 				return;
 			}
 
-			xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
 #ifdef DEBUG
 			xfs_alert(mp,
 				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
 				(unsigned long long)bp->b_bn, i,
 				be16_to_cpu(dip->di_magic));
 #endif
+			xfs_buf_verifier_error(bp, -EFSCORRUPTED,
+					__func__, dip, sizeof(*dip),
+					NULL);
+			return;
 		}
 	}
-	xfs_inobp_check(mp, bp);
 }
 
 
@@ -564,10 +572,7 @@ xfs_iread(
 		/* initialise the on-disk inode core */
 		memset(&ip->i_d, 0, sizeof(ip->i_d));
 		VFS_I(ip)->i_generation = prandom_u32();
-		if (xfs_sb_version_hascrc(&mp->m_sb))
-			ip->i_d.di_version = 3;
-		else
-			ip->i_d.di_version = 2;
+		ip->i_d.di_version = 3;
 		return 0;
 	}
 
@@ -649,3 +654,108 @@ xfs_iread(
 	xfs_trans_brelse(tp, bp);
 	return error;
 }
+
+/*
+ * Validate di_extsize hint.
+ *
+ * The rules are documented at xfs_ioctl_setattr_check_extsize().
+ * These functions must be kept in sync with each other.
+ */
+xfs_failaddr_t
+xfs_inode_validate_extsize(
+	struct xfs_mount		*mp,
+	uint32_t			extsize,
+	uint16_t			mode,
+	uint16_t			flags)
+{
+	bool				rt_flag;
+	bool				hint_flag;
+	bool				inherit_flag;
+	uint32_t			extsize_bytes;
+	uint32_t			blocksize_bytes;
+
+	rt_flag = (flags & XFS_DIFLAG_REALTIME);
+	hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
+	inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
+	extsize_bytes = XFS_FSB_TO_B(mp, extsize);
+
+	if (rt_flag)
+		blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
+	else
+		blocksize_bytes = mp->m_sb.sb_blocksize;
+
+	if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
+		return __this_address;
+
+	if (hint_flag && !S_ISREG(mode))
+		return __this_address;
+
+	if (inherit_flag && !S_ISDIR(mode))
+		return __this_address;
+
+	if ((hint_flag || inherit_flag) && extsize == 0)
+		return __this_address;
+
+	if (!(hint_flag || inherit_flag) && extsize != 0)
+		return __this_address;
+
+	if (extsize_bytes % blocksize_bytes)
+		return __this_address;
+
+	if (extsize > MAXEXTLEN)
+		return __this_address;
+
+	if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
+		return __this_address;
+
+	return NULL;
+}
+
+/*
+ * Validate di_cowextsize hint.
+ *
+ * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
+ * These functions must be kept in sync with each other.
+ */
+xfs_failaddr_t
+xfs_inode_validate_cowextsize(
+	struct xfs_mount		*mp,
+	uint32_t			cowextsize,
+	uint16_t			mode,
+	uint16_t			flags,
+	uint64_t			flags2)
+{
+	bool				rt_flag;
+	bool				hint_flag;
+	uint32_t			cowextsize_bytes;
+
+	rt_flag = (flags & XFS_DIFLAG_REALTIME);
+	hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
+	cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
+
+	if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
+		return __this_address;
+
+	if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
+		return __this_address;
+
+	if (hint_flag && cowextsize == 0)
+		return __this_address;
+
+	if (!hint_flag && cowextsize != 0)
+		return __this_address;
+
+	if (hint_flag && rt_flag)
+		return __this_address;
+
+	if (cowextsize_bytes % mp->m_sb.sb_blocksize)
+		return __this_address;
+
+	if (cowextsize > MAXEXTLEN)
+		return __this_address;
+
+	if (cowextsize > mp->m_sb.sb_agblocks / 2)
+		return __this_address;
+
+	return NULL;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 8a5e1da52d74..d9a376a78ee2 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -84,5 +84,10 @@ void	xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 
 xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
 			   struct xfs_dinode *dip);
+xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp,
+		uint32_t extsize, uint16_t mode, uint16_t flags);
+xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
+		uint32_t cowextsize, uint16_t mode, uint16_t flags,
+		uint64_t flags2);
 
 #endif	/* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 866d2861c625..701c42a28d05 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -195,8 +195,9 @@ xfs_iformat_local(
 	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
 			(unsigned long long) ip->i_ino, size,
 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
-		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
-				     ip->i_mount, dip);
+		xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+				"xfs_iformat_local", dip, sizeof(*dip),
+				__this_address);
 		return -EFSCORRUPTED;
 	}
 
@@ -231,8 +232,9 @@ xfs_iformat_extents(
 	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
 		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
 			(unsigned long long) ip->i_ino, nex);
-		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
-				     mp, dip);
+		xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+				"xfs_iformat_extents(1)", dip, sizeof(*dip),
+				__this_address);
 		return -EFSCORRUPTED;
 	}
 
@@ -245,10 +247,14 @@ xfs_iformat_extents(
 
 		xfs_iext_first(ifp, &icur);
 		for (i = 0; i < nex; i++, dp++) {
+			xfs_failaddr_t	fa;
+
 			xfs_bmbt_disk_get_all(dp, &new);
-			if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
-				XFS_ERROR_REPORT("xfs_iformat_extents(2)",
-						 XFS_ERRLEVEL_LOW, mp);
+			fa = xfs_bmap_validate_extent(ip, whichfork, &new);
+			if (fa) {
+				xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+						"xfs_iformat_extents(2)",
+						dp, sizeof(*dp), fa);
 				return -EFSCORRUPTED;
 			}
 
@@ -305,8 +311,9 @@ xfs_iformat_btree(
 		     level == 0 || level > XFS_BTREE_MAXLEVELS) {
 		xfs_warn(mp, "corrupt inode %Lu (btree).",
 					(unsigned long long) ip->i_ino);
-		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
-					 mp, dip);
+		xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+				"xfs_iformat_btree", dfp, size,
+				__this_address);
 		return -EFSCORRUPTED;
 	}
 
@@ -595,7 +602,7 @@ xfs_iextents_copy(
 	for_each_xfs_iext(ifp, &icur, &rec) {
 		if (isnullstartblock(rec.br_startblock))
 			continue;
-		ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, &rec));
+		ASSERT(xfs_bmap_validate_extent(ip, whichfork, &rec) == NULL);
 		xfs_bmbt_disk_set_all(dp, &rec);
 		trace_xfs_write_extent(ip, &icur, state, _RET_IP_);
 		copied += sizeof(struct xfs_bmbt_rec);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 8479769e470d..265fdcefcbae 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -79,8 +79,6 @@ xfs_refcountbt_alloc_block(
 	struct xfs_alloc_arg	args;		/* block allocation args */
 	int			error;		/* error return value */
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
 	memset(&args, 0, sizeof(args));
 	args.tp = cur->bc_tp;
 	args.mp = cur->bc_mp;
@@ -98,7 +96,6 @@ xfs_refcountbt_alloc_block(
 	trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
 			args.agbno, 1);
 	if (args.fsbno == NULLFSBLOCK) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
 	}
@@ -109,12 +106,10 @@ xfs_refcountbt_alloc_block(
 	be32_add_cpu(&agf->agf_refcount_blocks, 1);
 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
 	return 0;
 
 out_error:
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
 	return error;
 }
 
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index e829c3e489ea..8b0d0de1cd11 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -104,20 +104,15 @@ xfs_rmapbt_alloc_block(
 	int			error;
 	xfs_agblock_t		bno;
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
 	/* Allocate the new block from the freelist. If we can't, give up.  */
 	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
 				       &bno, 1);
-	if (error) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+	if (error)
 		return error;
-	}
 
 	trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
 			bno, 1);
 	if (bno == NULLAGBLOCK) {
-		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 		*stat = 0;
 		return 0;
 	}
@@ -130,7 +125,8 @@ xfs_rmapbt_alloc_block(
 	be32_add_cpu(&agf->agf_rmap_blocks, 1);
 	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
 
-	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+	xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_private.a.agno);
+
 	*stat = 1;
 	return 0;
 }
@@ -158,6 +154,8 @@ xfs_rmapbt_free_block(
 			      XFS_EXTENT_BUSY_SKIP_DISCARD);
 	xfs_trans_agbtree_delta(cur->bc_tp, -1);
 
+	xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_private.a.agno);
+
 	return 0;
 }
 
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a55f7a45fa78..53433cc024fd 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -731,7 +731,6 @@ xfs_sb_mount_common(
 	struct xfs_sb	*sbp)
 {
 	mp->m_agfrotor = mp->m_agirotor = 0;
-	spin_lock_init(&mp->m_agirotor_lock);
 	mp->m_maxagi = mp->m_sb.sb_agcount;
 	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
 	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 05c66e05ae20..018aabbd9394 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -80,7 +80,7 @@ xfs_scrub_walk_agfl(
 	}
 
 	/* first to the end */
-	for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) {
+	for (i = flfirst; i < xfs_agfl_size(mp); i++) {
 		error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
 		if (error)
 			return error;
@@ -664,7 +664,7 @@ xfs_scrub_agf(
 	if (agfl_last > agfl_first)
 		fl_count = agfl_last - agfl_first + 1;
 	else
-		fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1;
+		fl_count = xfs_agfl_size(mp) - agfl_first + agfl_last + 1;
 	if (agfl_count != 0 && fl_count != agfl_count)
 		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
 
@@ -791,7 +791,7 @@ xfs_scrub_agfl(
 	/* Allocate buffer to ensure uniqueness of AGFL entries. */
 	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
 	agflcount = be32_to_cpu(agf->agf_flcount);
-	if (agflcount > XFS_AGFL_SIZE(sc->mp)) {
+	if (agflcount > xfs_agfl_size(sc->mp)) {
 		xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
 		goto out;
 	}
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 4ed80474f545..127575f0abfb 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -98,7 +98,7 @@ xfs_scrub_xattr_listent(
 
 	if (flags & XFS_ATTR_INCOMPLETE) {
 		/* Incomplete attr key, just mark the inode for preening. */
-		xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino, NULL);
+		xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino);
 		return;
 	}
 
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index d00282130492..639d14b51e90 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -37,6 +37,7 @@
 #include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
 #include "xfs_refcount.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
@@ -423,6 +424,169 @@ xfs_scrub_bmap_btree(
 	return error;
 }
 
+struct xfs_scrub_bmap_check_rmap_info {
+	struct xfs_scrub_context	*sc;
+	int				whichfork;
+	struct xfs_iext_cursor		icur;
+};
+
+/* Can we find bmaps that fit this rmap? */
+STATIC int
+xfs_scrub_bmap_check_rmap(
+	struct xfs_btree_cur		*cur,
+	struct xfs_rmap_irec		*rec,
+	void				*priv)
+{
+	struct xfs_bmbt_irec		irec;
+	struct xfs_scrub_bmap_check_rmap_info	*sbcri = priv;
+	struct xfs_ifork		*ifp;
+	struct xfs_scrub_context	*sc = sbcri->sc;
+	bool				have_map;
+
+	/* Is this even the right fork? */
+	if (rec->rm_owner != sc->ip->i_ino)
+		return 0;
+	if ((sbcri->whichfork == XFS_ATTR_FORK) ^
+	    !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
+		return 0;
+	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+		return 0;
+
+	/* Now look up the bmbt record. */
+	ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork);
+	if (!ifp) {
+		xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+				rec->rm_offset);
+		goto out;
+	}
+	have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
+			&sbcri->icur, &irec);
+	if (!have_map)
+		xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+				rec->rm_offset);
+	/*
+	 * bmap extent record lengths are constrained to 2^21 blocks in length
+	 * because of space constraints in the on-disk metadata structure.
+	 * However, rmap extent record lengths are constrained only by AG
+	 * length, so we have to loop through the bmbt to make sure that the
+	 * entire rmap is covered by bmbt records.
+	 */
+	while (have_map) {
+		if (irec.br_startoff != rec->rm_offset)
+			xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+					rec->rm_offset);
+		if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
+				cur->bc_private.a.agno, rec->rm_startblock))
+			xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+					rec->rm_offset);
+		if (irec.br_blockcount > rec->rm_blockcount)
+			xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+					rec->rm_offset);
+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+			break;
+		rec->rm_startblock += irec.br_blockcount;
+		rec->rm_offset += irec.br_blockcount;
+		rec->rm_blockcount -= irec.br_blockcount;
+		if (rec->rm_blockcount == 0)
+			break;
+		have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
+		if (!have_map)
+			xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+					rec->rm_offset);
+	}
+
+out:
+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		return XFS_BTREE_QUERY_RANGE_ABORT;
+	return 0;
+}
+
+/* Make sure each rmap has a corresponding bmbt entry. */
+STATIC int
+xfs_scrub_bmap_check_ag_rmaps(
+	struct xfs_scrub_context	*sc,
+	int				whichfork,
+	xfs_agnumber_t			agno)
+{
+	struct xfs_scrub_bmap_check_rmap_info	sbcri;
+	struct xfs_btree_cur		*cur;
+	struct xfs_buf			*agf;
+	int				error;
+
+	error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf);
+	if (error)
+		return error;
+
+	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno);
+	if (!cur) {
+		error = -ENOMEM;
+		goto out_agf;
+	}
+
+	sbcri.sc = sc;
+	sbcri.whichfork = whichfork;
+	error = xfs_rmap_query_all(cur, xfs_scrub_bmap_check_rmap, &sbcri);
+	if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+		error = 0;
+
+	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+out_agf:
+	xfs_trans_brelse(sc->tp, agf);
+	return error;
+}
+
+/* Make sure each rmap has a corresponding bmbt entry. */
+STATIC int
+xfs_scrub_bmap_check_rmaps(
+	struct xfs_scrub_context	*sc,
+	int				whichfork)
+{
+	loff_t				size;
+	xfs_agnumber_t			agno;
+	int				error;
+
+	if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) ||
+	    whichfork == XFS_COW_FORK ||
+	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+		return 0;
+
+	/* Don't support realtime rmap checks yet. */
+	if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK)
+		return 0;
+
+	/*
+	 * Only do this for complex maps that are in btree format, or for
+	 * situations where we would seem to have a size but zero extents.
+	 * The inode repair code can zap broken iforks, which means we have
+	 * to flag this bmap as corrupt if there are rmaps that need to be
+	 * reattached.
+	 */
+	switch (whichfork) {
+	case XFS_DATA_FORK:
+		size = i_size_read(VFS_I(sc->ip));
+		break;
+	case XFS_ATTR_FORK:
+		size = XFS_IFORK_Q(sc->ip);
+		break;
+	default:
+		size = 0;
+		break;
+	}
+	if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+	    (size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0))
+		return 0;
+
+	for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
+		error = xfs_scrub_bmap_check_ag_rmaps(sc, whichfork, agno);
+		if (error)
+			return error;
+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+			break;
+	}
+
+	return 0;
+}
+
 /*
  * Scrub an inode fork's block mappings.
  *
@@ -457,16 +621,16 @@ xfs_scrub_bmap(
 			goto out;
 		/* No CoW forks on non-reflink inodes/filesystems. */
 		if (!xfs_is_reflink_inode(ip)) {
-			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
 			goto out;
 		}
 		break;
 	case XFS_ATTR_FORK:
 		if (!ifp)
-			goto out;
+			goto out_check_rmap;
 		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
 		    !xfs_sb_version_hasattr2(&mp->m_sb))
-			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
 		break;
 	default:
 		ASSERT(whichfork == XFS_DATA_FORK);
@@ -534,6 +698,10 @@ xfs_scrub_bmap(
 			goto out;
 	}
 
+out_check_rmap:
+	error = xfs_scrub_bmap_check_rmaps(sc, whichfork);
+	if (!xfs_scrub_fblock_xref_process_error(sc, whichfork, 0, &error))
+		goto out;
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 8033ab9d8f47..8ed91d5c868d 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -213,12 +213,10 @@ xfs_scrub_block_set_preen(
 void
 xfs_scrub_ino_set_preen(
 	struct xfs_scrub_context	*sc,
-	xfs_ino_t			ino,
-	struct xfs_buf			*bp)
+	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
-	trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0,
-			__return_address);
+	trace_xfs_scrub_ino_preen(sc, ino, __return_address);
 }
 
 /* Record a corrupt block. */
@@ -249,22 +247,20 @@ xfs_scrub_block_xref_set_corrupt(
 void
 xfs_scrub_ino_set_corrupt(
 	struct xfs_scrub_context	*sc,
-	xfs_ino_t			ino,
-	struct xfs_buf			*bp)
+	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
-	trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
+	trace_xfs_scrub_ino_error(sc, ino, __return_address);
 }
 
 /* Record a corruption while cross-referencing with an inode. */
 void
 xfs_scrub_ino_xref_set_corrupt(
 	struct xfs_scrub_context	*sc,
-	xfs_ino_t			ino,
-	struct xfs_buf			*bp)
+	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
-	trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
+	trace_xfs_scrub_ino_error(sc, ino, __return_address);
 }
 
 /* Record corruption in a block indexed by a file fork. */
@@ -296,12 +292,10 @@ xfs_scrub_fblock_xref_set_corrupt(
 void
 xfs_scrub_ino_set_warning(
 	struct xfs_scrub_context	*sc,
-	xfs_ino_t			ino,
-	struct xfs_buf			*bp)
+	xfs_ino_t			ino)
 {
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
-	trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0,
-			__return_address);
+	trace_xfs_scrub_ino_warning(sc, ino, __return_address);
 }
 
 /* Warn about a block indexed by a file fork that needs review. */
@@ -619,7 +613,7 @@ xfs_scrub_checkpoint_log(
 {
 	int			error;
 
-	error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+	error = xfs_log_force(mp, XFS_LOG_SYNC);
 	if (error)
 		return error;
 	xfs_ail_push_all_sync(mp->m_ail);
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index ddb65d22c76a..deaf60400981 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -63,25 +63,22 @@ bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc,
 
 void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
 		struct xfs_buf *bp);
-void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
-		struct xfs_buf *bp);
+void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino);
 
 void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc,
 		struct xfs_buf *bp);
-void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
-		struct xfs_buf *bp);
+void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino);
 void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
 		xfs_fileoff_t offset);
 
 void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc,
 		struct xfs_buf *bp);
-void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
-		struct xfs_buf *bp);
+void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc,
+		xfs_ino_t ino);
 void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
 		int whichfork, xfs_fileoff_t offset);
 
-void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
-		struct xfs_buf *bp);
+void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino);
 void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
 		xfs_fileoff_t offset);
 
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 50b6a26b0299..38f29806eb54 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -781,7 +781,7 @@ xfs_scrub_directory(
 
 	/* Plausible size? */
 	if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) {
-		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
 		goto out;
 	}
 
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 63ab3f98430d..106ca4bd753f 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -259,7 +259,8 @@ xfs_scrub_iallocbt_check_freemask(
 
 		error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
 				&dip, &bp, 0, 0);
-		if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error))
+		if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 0,
+				&error))
 			continue;
 
 		/* Which inodes are free? */
@@ -433,7 +434,7 @@ xfs_scrub_iallocbt_xref_rmap_inodes(
 	if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
 		return;
 	if (blocks != inode_blocks)
-		xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+		xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
 }
 
 /* Scrub the inode btrees for some AG. */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 21297bef8df1..df14930e4fc5 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -89,67 +89,21 @@ out:
 
 /* Inode core */
 
-/*
- * Validate di_extsize hint.
- *
- * The rules are documented at xfs_ioctl_setattr_check_extsize().
- * These functions must be kept in sync with each other.
- */
+/* Validate di_extsize hint. */
 STATIC void
 xfs_scrub_inode_extsize(
 	struct xfs_scrub_context	*sc,
-	struct xfs_buf			*bp,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
 	uint16_t			mode,
 	uint16_t			flags)
 {
-	struct xfs_mount		*mp = sc->mp;
-	bool				rt_flag;
-	bool				hint_flag;
-	bool				inherit_flag;
-	uint32_t			extsize;
-	uint32_t			extsize_bytes;
-	uint32_t			blocksize_bytes;
-
-	rt_flag = (flags & XFS_DIFLAG_REALTIME);
-	hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
-	inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
-	extsize = be32_to_cpu(dip->di_extsize);
-	extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
-
-	if (rt_flag)
-		blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
-	else
-		blocksize_bytes = mp->m_sb.sb_blocksize;
-
-	if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
-		goto bad;
-
-	if (hint_flag && !S_ISREG(mode))
-		goto bad;
-
-	if (inherit_flag && !S_ISDIR(mode))
-		goto bad;
-
-	if ((hint_flag || inherit_flag) && extsize == 0)
-		goto bad;
-
-	if (!(hint_flag || inherit_flag) && extsize != 0)
-		goto bad;
-
-	if (extsize_bytes % blocksize_bytes)
-		goto bad;
-
-	if (extsize > MAXEXTLEN)
-		goto bad;
+	xfs_failaddr_t			fa;
 
-	if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
-		goto bad;
-
-	return;
-bad:
-	xfs_scrub_ino_set_corrupt(sc, ino, bp);
+	fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
+			mode, flags);
+	if (fa)
+		xfs_scrub_ino_set_corrupt(sc, ino);
 }
 
 /*
@@ -161,58 +115,25 @@ bad:
 STATIC void
 xfs_scrub_inode_cowextsize(
 	struct xfs_scrub_context	*sc,
-	struct xfs_buf			*bp,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
 	uint16_t			mode,
 	uint16_t			flags,
 	uint64_t			flags2)
 {
-	struct xfs_mount		*mp = sc->mp;
-	bool				rt_flag;
-	bool				hint_flag;
-	uint32_t			extsize;
-	uint32_t			extsize_bytes;
-
-	rt_flag = (flags & XFS_DIFLAG_REALTIME);
-	hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
-	extsize = be32_to_cpu(dip->di_cowextsize);
-	extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
-
-	if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
-		goto bad;
-
-	if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
-		goto bad;
-
-	if (hint_flag && extsize == 0)
-		goto bad;
-
-	if (!hint_flag && extsize != 0)
-		goto bad;
-
-	if (hint_flag && rt_flag)
-		goto bad;
-
-	if (extsize_bytes % mp->m_sb.sb_blocksize)
-		goto bad;
-
-	if (extsize > MAXEXTLEN)
-		goto bad;
-
-	if (extsize > mp->m_sb.sb_agblocks / 2)
-		goto bad;
+	xfs_failaddr_t			fa;
 
-	return;
-bad:
-	xfs_scrub_ino_set_corrupt(sc, ino, bp);
+	fa = xfs_inode_validate_cowextsize(sc->mp,
+			be32_to_cpu(dip->di_cowextsize), mode, flags,
+			flags2);
+	if (fa)
+		xfs_scrub_ino_set_corrupt(sc, ino);
 }
 
 /* Make sure the di_flags make sense for the inode. */
 STATIC void
 xfs_scrub_inode_flags(
 	struct xfs_scrub_context	*sc,
-	struct xfs_buf			*bp,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
 	uint16_t			mode,
@@ -251,14 +172,13 @@ xfs_scrub_inode_flags(
 
 	return;
 bad:
-	xfs_scrub_ino_set_corrupt(sc, ino, bp);
+	xfs_scrub_ino_set_corrupt(sc, ino);
 }
 
 /* Make sure the di_flags2 make sense for the inode. */
 STATIC void
 xfs_scrub_inode_flags2(
 	struct xfs_scrub_context	*sc,
-	struct xfs_buf			*bp,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino,
 	uint16_t			mode,
@@ -295,14 +215,13 @@ xfs_scrub_inode_flags2(
 
 	return;
 bad:
-	xfs_scrub_ino_set_corrupt(sc, ino, bp);
+	xfs_scrub_ino_set_corrupt(sc, ino);
 }
 
 /* Scrub all the ondisk inode fields. */
 STATIC void
 xfs_scrub_dinode(
 	struct xfs_scrub_context	*sc,
-	struct xfs_buf			*bp,
 	struct xfs_dinode		*dip,
 	xfs_ino_t			ino)
 {
@@ -333,7 +252,7 @@ xfs_scrub_dinode(
 		/* mode is recognized */
 		break;
 	default:
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	}
 
@@ -344,22 +263,22 @@ xfs_scrub_dinode(
 		 * We autoconvert v1 inodes into v2 inodes on writeout,
 		 * so just mark this inode for preening.
 		 */
-		xfs_scrub_ino_set_preen(sc, ino, bp);
+		xfs_scrub_ino_set_preen(sc, ino);
 		break;
 	case 2:
 	case 3:
 		if (dip->di_onlink != 0)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 
 		if (dip->di_mode == 0 && sc->ip)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 
 		if (dip->di_projid_hi != 0 &&
 		    !xfs_sb_version_hasprojid32bit(&mp->m_sb))
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	default:
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 		return;
 	}
 
@@ -369,40 +288,40 @@ xfs_scrub_dinode(
 	 */
 	if (dip->di_uid == cpu_to_be32(-1U) ||
 	    dip->di_gid == cpu_to_be32(-1U))
-		xfs_scrub_ino_set_warning(sc, ino, bp);
+		xfs_scrub_ino_set_warning(sc, ino);
 
 	/* di_format */
 	switch (dip->di_format) {
 	case XFS_DINODE_FMT_DEV:
 		if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
 		    !S_ISFIFO(mode) && !S_ISSOCK(mode))
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_LOCAL:
 		if (!S_ISDIR(mode) && !S_ISLNK(mode))
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
 		if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_BTREE:
 		if (!S_ISREG(mode) && !S_ISDIR(mode))
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_UUID:
 	default:
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	}
 
 	/* di_[amc]time.nsec */
 	if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 	if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 	if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 
 	/*
 	 * di_size.  xfs_dinode_verify checks for things that screw up
@@ -411,19 +330,19 @@ xfs_scrub_dinode(
 	 */
 	isize = be64_to_cpu(dip->di_size);
 	if (isize & (1ULL << 63))
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 
 	/* Devices, fifos, and sockets must have zero size */
 	if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 
 	/* Directories can't be larger than the data section size (32G) */
 	if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 
 	/* Symlinks can't be larger than SYMLINK_MAXLEN */
 	if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 
 	/*
 	 * Warn if the running kernel can't handle the kinds of offsets
@@ -432,7 +351,7 @@ xfs_scrub_dinode(
 	 * overly large offsets, flag the inode for admin review.
 	 */
 	if (isize >= mp->m_super->s_maxbytes)
-		xfs_scrub_ino_set_warning(sc, ino, bp);
+		xfs_scrub_ino_set_warning(sc, ino);
 
 	/* di_nblocks */
 	if (flags2 & XFS_DIFLAG2_REFLINK) {
@@ -447,15 +366,15 @@ xfs_scrub_dinode(
 		 */
 		if (be64_to_cpu(dip->di_nblocks) >=
 		    mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 	} else {
 		if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 	}
 
-	xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags);
+	xfs_scrub_inode_flags(sc, dip, ino, mode, flags);
 
-	xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags);
+	xfs_scrub_inode_extsize(sc, dip, ino, mode, flags);
 
 	/* di_nextents */
 	nextents = be32_to_cpu(dip->di_nextents);
@@ -463,31 +382,31 @@ xfs_scrub_dinode(
 	switch (dip->di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		if (nextents > fork_recs)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_BTREE:
 		if (nextents <= fork_recs)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	default:
 		if (nextents != 0)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	}
 
 	/* di_forkoff */
 	if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 	if (dip->di_anextents != 0 && dip->di_forkoff == 0)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 	if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 
 	/* di_aformat */
 	if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
 	    dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
 	    dip->di_aformat != XFS_DINODE_FMT_BTREE)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 
 	/* di_anextents */
 	nextents = be16_to_cpu(dip->di_anextents);
@@ -495,92 +414,26 @@ xfs_scrub_dinode(
 	switch (dip->di_aformat) {
 	case XFS_DINODE_FMT_EXTENTS:
 		if (nextents > fork_recs)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	case XFS_DINODE_FMT_BTREE:
 		if (nextents <= fork_recs)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 		break;
 	default:
 		if (nextents != 0)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
+			xfs_scrub_ino_set_corrupt(sc, ino);
 	}
 
 	if (dip->di_version >= 3) {
 		if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
-			xfs_scrub_ino_set_corrupt(sc, ino, bp);
-		xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
-		xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
+			xfs_scrub_ino_set_corrupt(sc, ino);
+		xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2);
+		xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags,
 				flags2);
 	}
 }
 
-/* Map and read a raw inode. */
-STATIC int
-xfs_scrub_inode_map_raw(
-	struct xfs_scrub_context	*sc,
-	xfs_ino_t			ino,
-	struct xfs_buf			**bpp,
-	struct xfs_dinode		**dipp)
-{
-	struct xfs_imap			imap;
-	struct xfs_mount		*mp = sc->mp;
-	struct xfs_buf			*bp = NULL;
-	struct xfs_dinode		*dip;
-	int				error;
-
-	error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED);
-	if (error == -EINVAL) {
-		/*
-		 * Inode could have gotten deleted out from under us;
-		 * just forget about it.
-		 */
-		error = -ENOENT;
-		goto out;
-	}
-	if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
-			XFS_INO_TO_AGBNO(mp, ino), &error))
-		goto out;
-
-	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
-			imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp,
-			NULL);
-	if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
-			XFS_INO_TO_AGBNO(mp, ino), &error))
-		goto out;
-
-	/*
-	 * Is this really an inode?  We disabled verifiers in the above
-	 * xfs_trans_read_buf call because the inode buffer verifier
-	 * fails on /any/ inode record in the inode cluster with a bad
-	 * magic or version number, not just the one that we're
-	 * checking.  Therefore, grab the buffer unconditionally, attach
-	 * the inode verifiers by hand, and run the inode verifier only
-	 * on the one inode we want.
-	 */
-	bp->b_ops = &xfs_inode_buf_ops;
-	dip = xfs_buf_offset(bp, imap.im_boffset);
-	if (xfs_dinode_verify(mp, ino, dip) != NULL ||
-	    !xfs_dinode_good_version(mp, dip->di_version)) {
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
-		goto out_buf;
-	}
-
-	/* ...and is it the one we asked for? */
-	if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) {
-		error = -ENOENT;
-		goto out_buf;
-	}
-
-	*dipp = dip;
-	*bpp = bp;
-out:
-	return error;
-out_buf:
-	xfs_trans_brelse(sc->tp, bp);
-	return error;
-}
-
 /*
  * Make sure the finobt doesn't think this inode is free.
  * We don't have to check the inobt ourselves because we got the inode via
@@ -645,18 +498,18 @@ xfs_scrub_inode_xref_bmap(
 	if (!xfs_scrub_should_check_xref(sc, &error, NULL))
 		return;
 	if (nextents < be32_to_cpu(dip->di_nextents))
-		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 
 	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
 			&nextents, &acount);
 	if (!xfs_scrub_should_check_xref(sc, &error, NULL))
 		return;
 	if (nextents != be16_to_cpu(dip->di_anextents))
-		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 
 	/* Check nblocks against the inode. */
 	if (count + acount != be64_to_cpu(dip->di_nblocks))
-		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+		xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
 }
 
 /* Cross-reference with the other btrees. */
@@ -700,8 +553,7 @@ xfs_scrub_inode_xref(
 static void
 xfs_scrub_inode_check_reflink_iflag(
 	struct xfs_scrub_context	*sc,
-	xfs_ino_t			ino,
-	struct xfs_buf			*bp)
+	xfs_ino_t			ino)
 {
 	struct xfs_mount		*mp = sc->mp;
 	bool				has_shared;
@@ -716,9 +568,9 @@ xfs_scrub_inode_check_reflink_iflag(
 			XFS_INO_TO_AGBNO(mp, ino), &error))
 		return;
 	if (xfs_is_reflink_inode(sc->ip) && !has_shared)
-		xfs_scrub_ino_set_preen(sc, ino, bp);
+		xfs_scrub_ino_set_preen(sc, ino);
 	else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
-		xfs_scrub_ino_set_corrupt(sc, ino, bp);
+		xfs_scrub_ino_set_corrupt(sc, ino);
 }
 
 /* Scrub an inode. */
@@ -727,43 +579,33 @@ xfs_scrub_inode(
 	struct xfs_scrub_context	*sc)
 {
 	struct xfs_dinode		di;
-	struct xfs_buf			*bp = NULL;
-	struct xfs_dinode		*dip;
-	xfs_ino_t			ino;
 	int				error = 0;
 
-	/* Did we get the in-core inode, or are we doing this manually? */
-	if (sc->ip) {
-		ino = sc->ip->i_ino;
-		xfs_inode_to_disk(sc->ip, &di, 0);
-		dip = &di;
-	} else {
-		/* Map & read inode. */
-		ino = sc->sm->sm_ino;
-		error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip);
-		if (error || !bp)
-			goto out;
+	/*
+	 * If sc->ip is NULL, that means that the setup function called
+	 * xfs_iget to look up the inode.  xfs_iget returned a EFSCORRUPTED
+	 * and a NULL inode, so flag the corruption error and return.
+	 */
+	if (!sc->ip) {
+		xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino);
+		return 0;
 	}
 
-	xfs_scrub_dinode(sc, bp, dip, ino);
+	/* Scrub the inode core. */
+	xfs_inode_to_disk(sc->ip, &di, 0);
+	xfs_scrub_dinode(sc, &di, sc->ip->i_ino);
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out;
 
-	/* Now let's do the things that require a live inode. */
-	if (!sc->ip)
-		goto out;
-
 	/*
 	 * Look for discrepancies between file's data blocks and the reflink
 	 * iflag.  We already checked the iflag against the file mode when
 	 * we scrubbed the dinode.
 	 */
 	if (S_ISREG(VFS_I(sc->ip)->i_mode))
-		xfs_scrub_inode_check_reflink_iflag(sc, ino, bp);
+		xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino);
 
-	xfs_scrub_inode_xref(sc, ino, dip);
+	xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di);
 out:
-	if (bp)
-		xfs_trans_brelse(sc->tp, bp);
 	return error;
 }
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 0d3851410c74..1fb88c18d455 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -167,8 +167,18 @@ xfs_scrub_parent_validate(
 	 * if the parent pointer erroneously points to a file, we
 	 * can't use DONTCACHE here because DONTCACHE inodes can trigger
 	 * immediate inactive cleanup of the inode.
+	 *
+	 * If _iget returns -EINVAL then the parent inode number is garbage
+	 * and the directory is corrupt.  If the _iget returns -EFSCORRUPTED
+	 * or -EFSBADCRC then the parent is corrupt which is a cross
+	 * referencing error.  Any other error is an operational error.
 	 */
-	error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
+	error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
+	if (error == -EINVAL) {
+		error = -EFSCORRUPTED;
+		xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
+		goto out;
+	}
 	if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
 		goto out;
 	if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 51daa4ae2627..6ba465e6c885 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -219,7 +219,7 @@ xfs_scrub_quota(
 	/* Look for problem extents. */
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
-		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
 		goto out_unlock_inode;
 	}
 	max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 26390991369a..39c41dfe08ee 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -116,8 +116,7 @@ xfs_scrub_xref_is_used_rt_space(
 	if (!xfs_scrub_should_check_xref(sc, &error, NULL))
 		goto out_unlock;
 	if (is_free)
-		xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino,
-				NULL);
+		xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino);
 out_unlock:
 	xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
 }
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 4dc896852bf0..5d2b1c241be5 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -174,53 +174,32 @@ DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error);
 DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen);
 
 DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
-	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, xfs_daddr_t daddr,
-		 void *ret_ip),
-	TP_ARGS(sc, ino, daddr, ret_ip),
+	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, void *ret_ip),
+	TP_ARGS(sc, ino, ret_ip),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(unsigned int, type)
-		__field(xfs_agnumber_t, agno)
-		__field(xfs_agblock_t, bno)
 		__field(void *, ret_ip)
 	),
 	TP_fast_assign(
-		xfs_fsblock_t	fsbno;
-		xfs_agnumber_t	agno;
-		xfs_agblock_t	bno;
-
-		if (daddr) {
-			fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr);
-			agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
-			bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
-		} else {
-			agno = XFS_INO_TO_AGNO(sc->mp, ino);
-			bno = XFS_AGINO_TO_AGBNO(sc->mp,
-					XFS_INO_TO_AGINO(sc->mp, ino));
-		}
-
 		__entry->dev = sc->mp->m_super->s_dev;
 		__entry->ino = ino;
 		__entry->type = sc->sm->sm_type;
-		__entry->agno = agno;
-		__entry->bno = bno;
 		__entry->ret_ip = ret_ip;
 	),
-	TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS",
+	TP_printk("dev %d:%d ino 0x%llx type %u ret_ip %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->type,
-		  __entry->agno,
-		  __entry->bno,
 		  __entry->ret_ip)
 )
 
 #define DEFINE_SCRUB_INO_ERROR_EVENT(name) \
 DEFINE_EVENT(xfs_scrub_ino_error_class, name, \
 	TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \
-		 xfs_daddr_t daddr, void *ret_ip), \
-	TP_ARGS(sc, ino, daddr, ret_ip))
+		 void *ret_ip), \
+	TP_ARGS(sc, ino, ret_ip))
 
 DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error);
 DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 9c6a830da0ee..19eadc807056 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -209,7 +209,8 @@ xfs_setfilesize_trans_alloc(
 	struct xfs_trans	*tp;
 	int			error;
 
-	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
+				XFS_TRANS_NOFS, &tp);
 	if (error)
 		return error;
 
@@ -1330,21 +1331,20 @@ xfs_get_blocks(
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
-	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
-				&imap, &nimaps, XFS_BMAPI_ENTIRE);
+	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+			&nimaps, 0);
 	if (error)
 		goto out_unlock;
-
-	if (nimaps) {
-		trace_xfs_get_blocks_found(ip, offset, size,
-			imap.br_state == XFS_EXT_UNWRITTEN ?
-				XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
-		xfs_iunlock(ip, lockmode);
-	} else {
+	if (!nimaps) {
 		trace_xfs_get_blocks_notfound(ip, offset, size);
 		goto out_unlock;
 	}
 
+	trace_xfs_get_blocks_found(ip, offset, size,
+		imap.br_state == XFS_EXT_UNWRITTEN ?
+			XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
+	xfs_iunlock(ip, lockmode);
+
 	/* trim mapping down to size requested */
 	xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
 
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c83f549dc17b..05dee8fdd895 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1208,18 +1208,15 @@ xfs_free_file_space(
 
 	/*
 	 * Now that we've unmap all full blocks we'll have to zero out any
-	 * partial block at the beginning and/or end.  xfs_zero_range is
-	 * smart enough to skip any holes, including those we just created,
-	 * but we must take care not to zero beyond EOF and enlarge i_size.
+	 * partial block at the beginning and/or end.  iomap_zero_range is smart
+	 * enough to skip any holes, including those we just created, but we
+	 * must take care not to zero beyond EOF and enlarge i_size.
 	 */
-
 	if (offset >= XFS_ISIZE(ip))
 		return 0;
-
 	if (offset + len > XFS_ISIZE(ip))
 		len = XFS_ISIZE(ip) - offset;
-
-	return xfs_zero_range(ip, offset, len, NULL);
+	return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
 }
 
 /*
@@ -1899,17 +1896,28 @@ xfs_swap_extents(
 	 * performed with log redo items!
 	 */
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		int		w	= XFS_DATA_FORK;
+		uint32_t	ipnext	= XFS_IFORK_NEXTENTS(ip, w);
+		uint32_t	tipnext	= XFS_IFORK_NEXTENTS(tip, w);
+
+		/*
+		 * Conceptually this shouldn't affect the shape of either bmbt,
+		 * but since we atomically move extents one by one, we reserve
+		 * enough space to rebuild both trees.
+		 */
+		resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w);
+		resblks +=  XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
+
 		/*
-		 * Conceptually this shouldn't affect the shape of either
-		 * bmbt, but since we atomically move extents one by one,
-		 * we reserve enough space to rebuild both trees.
+		 * Handle the corner case where either inode might straddle the
+		 * btree format boundary. If so, the inode could bounce between
+		 * btree <-> extent format on unmap -> remap cycles, freeing and
+		 * allocating a bmapbt block each time.
 		 */
-		resblks = XFS_SWAP_RMAP_SPACE_RES(mp,
-				XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK),
-				XFS_DATA_FORK) +
-			  XFS_SWAP_RMAP_SPACE_RES(mp,
-				XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
-				XFS_DATA_FORK);
+		if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1))
+			resblks += XFS_IFORK_MAXEXT(ip, w);
+		if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1))
+			resblks += XFS_IFORK_MAXEXT(tip, w);
 	}
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
 	if (error)
@@ -2003,11 +2011,11 @@ xfs_swap_extents(
 		ip->i_cowfp = tip->i_cowfp;
 		tip->i_cowfp = cowfp;
 
-		if (ip->i_cowfp && ip->i_cnextents)
+		if (ip->i_cowfp && ip->i_cowfp->if_bytes)
 			xfs_inode_set_cowblocks_tag(ip);
 		else
 			xfs_inode_clear_cowblocks_tag(ip);
-		if (tip->i_cowfp && tip->i_cnextents)
+		if (tip->i_cowfp && tip->i_cowfp->if_bytes)
 			xfs_inode_set_cowblocks_tag(tip);
 		else
 			xfs_inode_clear_cowblocks_tag(tip);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d1da2ee9e6db..ac669a10c62f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1708,7 +1708,7 @@ xfs_buftarg_isolate(
 	 * zero. If the value is already zero, we need to reclaim the
 	 * buffer, otherwise it gets another trip through the LRU.
 	 */
-	if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+	if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
 		spin_unlock(&bp->b_lock);
 		return LRU_ROTATE;
 	}
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 270ddb4d2313..82ad270e390e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -460,7 +460,7 @@ xfs_buf_item_unpin(
 			list_del_init(&bp->b_li_list);
 			bp->b_iodone = NULL;
 		} else {
-			spin_lock(&ailp->xa_lock);
+			spin_lock(&ailp->ail_lock);
 			xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
 			xfs_buf_item_relse(bp);
 			ASSERT(bp->b_log_item == NULL);
@@ -1057,12 +1057,12 @@ xfs_buf_do_callbacks_fail(
 	lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
 			li_bio_list);
 	ailp = lip->li_ailp;
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
 		if (lip->li_ops->iop_error)
 			lip->li_ops->iop_error(lip, bp);
 	}
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 }
 
 static bool
@@ -1226,7 +1226,7 @@ xfs_buf_iodone(
 	 *
 	 * Either way, AIL is useless if we're forcing a shutdown.
 	 */
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
 	xfs_buf_item_free(BUF_ITEM(lip));
 }
@@ -1246,7 +1246,7 @@ xfs_buf_resubmit_failed_buffers(
 	/*
 	 * Clear XFS_LI_FAILED flag from all items before resubmit
 	 *
-	 * XFS_LI_FAILED set/clear is protected by xa_lock, caller  this
+	 * XFS_LI_FAILED set/clear is protected by ail_lock, caller  this
 	 * function already have it acquired
 	 */
 	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 43572f8a1b8e..a7daef9e16bf 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -394,8 +394,6 @@ xfs_qm_dqalloc(
 error1:
 	xfs_defer_cancel(&dfops);
 error0:
-	xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
 	return error;
 }
 
@@ -920,7 +918,7 @@ xfs_qm_dqflush_done(
 	     (lip->li_flags & XFS_LI_FAILED))) {
 
 		/* xfs_trans_ail_delete() drops the AIL lock. */
-		spin_lock(&ailp->xa_lock);
+		spin_lock(&ailp->ail_lock);
 		if (lip->li_lsn == qip->qli_flush_lsn) {
 			xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
 		} else {
@@ -930,7 +928,7 @@ xfs_qm_dqflush_done(
 			 */
 			if (lip->li_flags & XFS_LI_FAILED)
 				xfs_clear_li_failed(lip);
-			spin_unlock(&ailp->xa_lock);
+			spin_unlock(&ailp->ail_lock);
 		}
 	}
 
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 96eaa6933709..4b331e354da7 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -157,8 +157,9 @@ xfs_dquot_item_error(
 STATIC uint
 xfs_qm_dquot_logitem_push(
 	struct xfs_log_item	*lip,
-	struct list_head	*buffer_list) __releases(&lip->li_ailp->xa_lock)
-					      __acquires(&lip->li_ailp->xa_lock)
+	struct list_head	*buffer_list)
+		__releases(&lip->li_ailp->ail_lock)
+		__acquires(&lip->li_ailp->ail_lock)
 {
 	struct xfs_dquot	*dqp = DQUOT_ITEM(lip)->qli_dquot;
 	struct xfs_buf		*bp = lip->li_buf;
@@ -205,7 +206,7 @@ xfs_qm_dquot_logitem_push(
 		goto out_unlock;
 	}
 
-	spin_unlock(&lip->li_ailp->xa_lock);
+	spin_unlock(&lip->li_ailp->ail_lock);
 
 	error = xfs_qm_dqflush(dqp, &bp);
 	if (error) {
@@ -217,7 +218,7 @@ xfs_qm_dquot_logitem_push(
 		xfs_buf_relse(bp);
 	}
 
-	spin_lock(&lip->li_ailp->xa_lock);
+	spin_lock(&lip->li_ailp->ail_lock);
 out_unlock:
 	xfs_dqunlock(dqp);
 	return rval;
@@ -400,7 +401,7 @@ xfs_qm_qoffend_logitem_committed(
 	 * Delete the qoff-start logitem from the AIL.
 	 * xfs_trans_ail_delete() drops the AIL lock.
 	 */
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
 
 	kmem_free(qfs->qql_item.li_lv_shadow);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ccf520f0b00d..a63f5083f497 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,27 +347,32 @@ xfs_corruption_error(
  * values, and omit the stack trace unless the error level is tuned high.
  */
 void
-xfs_verifier_error(
+xfs_buf_verifier_error(
 	struct xfs_buf		*bp,
 	int			error,
+	const char		*name,
+	void			*buf,
+	size_t			bufsz,
 	xfs_failaddr_t		failaddr)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 	xfs_failaddr_t		fa;
+	int			sz;
 
 	fa = failaddr ? failaddr : __return_address;
 	__xfs_buf_ioerror(bp, error, fa);
 
-	xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
+	xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx %s",
 		  bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
-		  fa, bp->b_ops->name, bp->b_bn);
+		  fa, bp->b_ops->name, bp->b_bn, name);
 
 	xfs_alert(mp, "Unmount and run xfs_repair");
 
 	if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+		sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz);
 		xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
-				XFS_CORRUPTION_DUMP_LEN);
-		xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN);
+				sz);
+		xfs_hex_dump(buf, sz);
 	}
 
 	if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
@@ -375,6 +380,20 @@ xfs_verifier_error(
 }
 
 /*
+ * Warnings specifically for verifier errors.  Differentiate CRC vs. invalid
+ * values, and omit the stack trace unless the error level is tuned high.
+ */
+void
+xfs_verifier_error(
+	struct xfs_buf		*bp,
+	int			error,
+	xfs_failaddr_t		failaddr)
+{
+	return xfs_buf_verifier_error(bp, error, "", xfs_buf_offset(bp, 0),
+			XFS_CORRUPTION_DUMP_LEN, failaddr);
+}
+
+/*
  * Warnings for inode corruption problems.  Don't bother with the stack
  * trace unless the error level is turned up high.
  */
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 7e728c5a46b8..ce391349e78b 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -26,6 +26,9 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
 extern void xfs_corruption_error(const char *tag, int level,
 			struct xfs_mount *mp, void *p, const char *filename,
 			int linenum, xfs_failaddr_t failaddr);
+extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error,
+			const char *name, void *buf, size_t bufsz,
+			xfs_failaddr_t failaddr);
 extern void xfs_verifier_error(struct xfs_buf *bp, int error,
 			xfs_failaddr_t failaddr);
 extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index fe1bfee35898..761f3189eff2 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(
 
 	if (!lsn)
 		return 0;
-	return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+	return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
 }
 
 const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 77760dbf0242..13e3d1a69e76 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -611,10 +611,9 @@ xfs_extent_busy_flush(
 	unsigned		busy_gen)
 {
 	DEFINE_WAIT		(wait);
-	int			log_flushed = 0, error;
+	int			error;
 
-	trace_xfs_log_force(mp, 0, _THIS_IP_);
-	error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed);
+	error = xfs_log_force(mp, XFS_LOG_SYNC);
 	if (error)
 		return;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9ea08326f876..299aee4b7b0b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -48,20 +48,6 @@
 
 static const struct vm_operations_struct xfs_file_vm_ops;
 
-/*
- * Clear the specified ranges to zero through either the pagecache or DAX.
- * Holes and unwritten extents will be left as-is as they already are zeroed.
- */
-int
-xfs_zero_range(
-	struct xfs_inode	*ip,
-	xfs_off_t		pos,
-	xfs_off_t		count,
-	bool			*did_zero)
-{
-	return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
-}
-
 int
 xfs_update_prealloc_flags(
 	struct xfs_inode	*ip,
@@ -122,7 +108,7 @@ xfs_dir_fsync(
 
 	if (!lsn)
 		return 0;
-	return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+	return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
 }
 
 STATIC int
@@ -182,7 +168,7 @@ xfs_file_fsync(
 	}
 
 	if (lsn) {
-		error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+		error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
 		ip->i_itemp->ili_fsync_fields = 0;
 	}
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -301,31 +287,6 @@ xfs_file_read_iter(
 }
 
 /*
- * Zero any on disk space between the current EOF and the new, larger EOF.
- *
- * This handles the normal case of zeroing the remainder of the last block in
- * the file and the unusual case of zeroing blocks out beyond the size of the
- * file.  This second case only happens with fixed size extents and when the
- * system crashes before the inode size was updated but after blocks were
- * allocated.
- *
- * Expects the iolock to be held exclusive, and will take the ilock internally.
- */
-int					/* error (positive) */
-xfs_zero_eof(
-	struct xfs_inode	*ip,
-	xfs_off_t		offset,		/* starting I/O offset */
-	xfs_fsize_t		isize,		/* current inode size */
-	bool			*did_zeroing)
-{
-	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-	ASSERT(offset > isize);
-
-	trace_xfs_zero_eof(ip, isize, offset - isize);
-	return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
-}
-
-/*
  * Common pre-write limit and setup checks.
  *
  * Called with the iolocked held either shared and exclusive according to
@@ -344,6 +305,7 @@ xfs_file_aio_write_checks(
 	ssize_t			error = 0;
 	size_t			count = iov_iter_count(from);
 	bool			drained_dio = false;
+	loff_t			isize;
 
 restart:
 	error = generic_write_checks(iocb, from);
@@ -380,7 +342,8 @@ restart:
 	 * and hence be able to correctly determine if we need to run zeroing.
 	 */
 	spin_lock(&ip->i_flags_lock);
-	if (iocb->ki_pos > i_size_read(inode)) {
+	isize = i_size_read(inode);
+	if (iocb->ki_pos > isize) {
 		spin_unlock(&ip->i_flags_lock);
 		if (!drained_dio) {
 			if (*iolock == XFS_IOLOCK_SHARED) {
@@ -401,7 +364,10 @@ restart:
 			drained_dio = true;
 			goto restart;
 		}
-		error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
+	
+		trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
+		error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
+				NULL, &xfs_iomap_ops);
 		if (error)
 			return error;
 	} else
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8b4545623e25..523792768080 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -217,7 +217,7 @@ xfs_growfs_data_private(
 		}
 
 		agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
-		for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
+		for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
 			agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
 
 		error = xfs_bwrite(bp);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index d53a316162d6..9a18f69f6e96 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -483,7 +483,28 @@ xfs_iget_cache_miss(
 
 	trace_xfs_iget_miss(ip);
 
-	if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+
+	/*
+	 * If we are allocating a new inode, then check what was returned is
+	 * actually a free, empty inode. If we are not allocating an inode,
+	 * the check we didn't find a free inode.
+	 */
+	if (flags & XFS_IGET_CREATE) {
+		if (VFS_I(ip)->i_mode != 0) {
+			xfs_warn(mp,
+"Corruption detected! Free inode 0x%llx not marked free on disk",
+				ino);
+			error = -EFSCORRUPTED;
+			goto out_destroy;
+		}
+		if (ip->i_d.di_nblocks != 0) {
+			xfs_warn(mp,
+"Corruption detected! Free inode 0x%llx has blocks allocated!",
+				ino);
+			error = -EFSCORRUPTED;
+			goto out_destroy;
+		}
+	} else if (VFS_I(ip)->i_mode == 0) {
 		error = -ENOENT;
 		goto out_destroy;
 	}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 604ee384a00a..3e3aab3888fa 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1872,6 +1872,7 @@ xfs_inactive(
 	xfs_inode_t	*ip)
 {
 	struct xfs_mount	*mp;
+	struct xfs_ifork	*cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
 	int			error;
 	int			truncate = 0;
 
@@ -1892,6 +1893,10 @@ xfs_inactive(
 	if (mp->m_flags & XFS_MOUNT_RDONLY)
 		return;
 
+	/* Try to clean out the cow blocks if there are any. */
+	if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0)
+		xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+
 	if (VFS_I(ip)->i_nlink != 0) {
 		/*
 		 * force is true because we are evicting an inode from the
@@ -2470,6 +2475,10 @@ xfs_ifree(
 	ip->i_d.di_forkoff = 0;		/* mark the attr fork not in use */
 	ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
 	ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+	/* Don't attempt to replay owner changes for a deleted inode */
+	ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER);
+
 	/*
 	 * Bump the generation count so no one will be confused
 	 * by reincarnations of this inode.
@@ -2497,7 +2506,7 @@ xfs_iunpin(
 	trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
 
 	/* Give the log a push to start the unpinning I/O */
-	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
+	xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
 
 }
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3e8dc990d41c..132d8aa2afc4 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -443,10 +443,6 @@ enum xfs_prealloc_flags {
 
 int	xfs_update_prealloc_flags(struct xfs_inode *ip,
 				  enum xfs_prealloc_flags flags);
-int	xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
-		     xfs_fsize_t isize, bool *did_zeroing);
-int	xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
-		bool *did_zero);
 
 /* from xfs_iops.c */
 extern void xfs_setup_inode(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d5037f060d6f..34b91b789702 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -502,8 +502,8 @@ STATIC uint
 xfs_inode_item_push(
 	struct xfs_log_item	*lip,
 	struct list_head	*buffer_list)
-		__releases(&lip->li_ailp->xa_lock)
-		__acquires(&lip->li_ailp->xa_lock)
+		__releases(&lip->li_ailp->ail_lock)
+		__acquires(&lip->li_ailp->ail_lock)
 {
 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 	struct xfs_inode	*ip = iip->ili_inode;
@@ -562,7 +562,7 @@ xfs_inode_item_push(
 	ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
 	ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
 
-	spin_unlock(&lip->li_ailp->xa_lock);
+	spin_unlock(&lip->li_ailp->ail_lock);
 
 	error = xfs_iflush(ip, &bp);
 	if (!error) {
@@ -571,7 +571,7 @@ xfs_inode_item_push(
 		xfs_buf_relse(bp);
 	}
 
-	spin_lock(&lip->li_ailp->xa_lock);
+	spin_lock(&lip->li_ailp->ail_lock);
 out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
 	return rval;
@@ -579,9 +579,6 @@ out_unlock:
 
 /*
  * Unlock the inode associated with the inode log item.
- * Clear the fields of the inode and inode log item that
- * are specific to the current transaction.  If the
- * hold flags is set, do not unlock the inode.
  */
 STATIC void
 xfs_inode_item_unlock(
@@ -637,10 +634,6 @@ xfs_inode_item_committed(
 	return lsn;
 }
 
-/*
- * XXX rcc - this one really has to do something.  Probably needs
- * to stamp in a new field in the incore inode.
- */
 STATIC void
 xfs_inode_item_committing(
 	struct xfs_log_item	*lip,
@@ -759,7 +752,7 @@ xfs_iflush_done(
 		bool			mlip_changed = false;
 
 		/* this is an opencoded batch version of xfs_trans_ail_delete */
-		spin_lock(&ailp->xa_lock);
+		spin_lock(&ailp->ail_lock);
 		list_for_each_entry(blip, &tmp, li_bio_list) {
 			if (INODE_ITEM(blip)->ili_logged &&
 			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
@@ -770,15 +763,15 @@ xfs_iflush_done(
 		}
 
 		if (mlip_changed) {
-			if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
-				xlog_assign_tail_lsn_locked(ailp->xa_mount);
-			if (list_empty(&ailp->xa_ail))
-				wake_up_all(&ailp->xa_empty);
+			if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
+				xlog_assign_tail_lsn_locked(ailp->ail_mount);
+			if (list_empty(&ailp->ail_head))
+				wake_up_all(&ailp->ail_empty);
 		}
-		spin_unlock(&ailp->xa_lock);
+		spin_unlock(&ailp->ail_lock);
 
 		if (mlip_changed)
-			xfs_log_space_wake(ailp->xa_mount);
+			xfs_log_space_wake(ailp->ail_mount);
 	}
 
 	/*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 56475fcd76f2..e0307fbff911 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -46,6 +46,7 @@
 #include <linux/security.h>
 #include <linux/iomap.h>
 #include <linux/slab.h>
+#include <linux/iversion.h>
 
 /*
  * Directories have different lock order w.r.t. mmap_sem compared to regular
@@ -874,7 +875,9 @@ xfs_setattr_size(
 	 * truncate.
 	 */
 	if (newsize > oldsize) {
-		error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
+		trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
+		error = iomap_zero_range(inode, oldsize, newsize - oldsize,
+				&did_zeroing, &xfs_iomap_ops);
 	} else {
 		error = iomap_truncate_page(inode, newsize, &did_zeroing,
 				&xfs_iomap_ops);
@@ -1052,11 +1055,21 @@ xfs_vn_update_time(
 {
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
+	int			log_flags = XFS_ILOG_TIMESTAMP;
 	struct xfs_trans	*tp;
 	int			error;
 
 	trace_xfs_update_time(ip);
 
+	if (inode->i_sb->s_flags & SB_LAZYTIME) {
+		if (!((flags & S_VERSION) &&
+		      inode_maybe_inc_iversion(inode, false)))
+			return generic_update_time(inode, now, flags);
+
+		/* Capture the iversion update that just occurred */
+		log_flags |= XFS_ILOG_CORE;
+	}
+
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
 	if (error)
 		return error;
@@ -1070,7 +1083,7 @@ xfs_vn_update_time(
 		inode->i_atime = *now;
 
 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+	xfs_trans_log_inode(tp, ip, log_flags);
 	return xfs_trans_commit(tp);
 }
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3e5ba1ecc080..b9c9c848146b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -869,7 +869,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		return 0;
 	}
 
-	error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+	error = xfs_log_force(mp, XFS_LOG_SYNC);
 	ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
 
 #ifdef DEBUG
@@ -1149,7 +1149,7 @@ xlog_assign_tail_lsn_locked(
 	struct xfs_log_item	*lip;
 	xfs_lsn_t		tail_lsn;
 
-	assert_spin_locked(&mp->m_ail->xa_lock);
+	assert_spin_locked(&mp->m_ail->ail_lock);
 
 	/*
 	 * To make sure we always have a valid LSN for the log tail we keep
@@ -1172,9 +1172,9 @@ xlog_assign_tail_lsn(
 {
 	xfs_lsn_t		tail_lsn;
 
-	spin_lock(&mp->m_ail->xa_lock);
+	spin_lock(&mp->m_ail->ail_lock);
 	tail_lsn = xlog_assign_tail_lsn_locked(mp);
-	spin_unlock(&mp->m_ail->xa_lock);
+	spin_unlock(&mp->m_ail->ail_lock);
 
 	return tail_lsn;
 }
@@ -3304,269 +3304,215 @@ xlog_state_switch_iclogs(
  *		not in the active nor dirty state.
  */
 int
-_xfs_log_force(
+xfs_log_force(
 	struct xfs_mount	*mp,
-	uint			flags,
-	int			*log_flushed)
+	uint			flags)
 {
 	struct xlog		*log = mp->m_log;
 	struct xlog_in_core	*iclog;
 	xfs_lsn_t		lsn;
 
 	XFS_STATS_INC(mp, xs_log_force);
+	trace_xfs_log_force(mp, 0, _RET_IP_);
 
 	xlog_cil_force(log);
 
 	spin_lock(&log->l_icloglock);
-
 	iclog = log->l_iclog;
-	if (iclog->ic_state & XLOG_STATE_IOERROR) {
-		spin_unlock(&log->l_icloglock);
-		return -EIO;
-	}
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		goto out_error;
 
-	/* If the head iclog is not active nor dirty, we just attach
-	 * ourselves to the head and go to sleep.
-	 */
-	if (iclog->ic_state == XLOG_STATE_ACTIVE ||
-	    iclog->ic_state == XLOG_STATE_DIRTY) {
+	if (iclog->ic_state == XLOG_STATE_DIRTY ||
+	    (iclog->ic_state == XLOG_STATE_ACTIVE &&
+	     atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) {
 		/*
-		 * If the head is dirty or (active and empty), then
-		 * we need to look at the previous iclog.  If the previous
-		 * iclog is active or dirty we are done.  There is nothing
-		 * to sync out.  Otherwise, we attach ourselves to the
+		 * If the head is dirty or (active and empty), then we need to
+		 * look at the previous iclog.
+		 *
+		 * If the previous iclog is active or dirty we are done.  There
+		 * is nothing to sync out. Otherwise, we attach ourselves to the
 		 * previous iclog and go to sleep.
 		 */
-		if (iclog->ic_state == XLOG_STATE_DIRTY ||
-		    (atomic_read(&iclog->ic_refcnt) == 0
-		     && iclog->ic_offset == 0)) {
-			iclog = iclog->ic_prev;
-			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
-			    iclog->ic_state == XLOG_STATE_DIRTY)
-				goto no_sleep;
-			else
-				goto maybe_sleep;
-		} else {
-			if (atomic_read(&iclog->ic_refcnt) == 0) {
-				/* We are the only one with access to this
-				 * iclog.  Flush it out now.  There should
-				 * be a roundoff of zero to show that someone
-				 * has already taken care of the roundoff from
-				 * the previous sync.
-				 */
-				atomic_inc(&iclog->ic_refcnt);
-				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
-				xlog_state_switch_iclogs(log, iclog, 0);
-				spin_unlock(&log->l_icloglock);
+		iclog = iclog->ic_prev;
+		if (iclog->ic_state == XLOG_STATE_ACTIVE ||
+		    iclog->ic_state == XLOG_STATE_DIRTY)
+			goto out_unlock;
+	} else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+		if (atomic_read(&iclog->ic_refcnt) == 0) {
+			/*
+			 * We are the only one with access to this iclog.
+			 *
+			 * Flush it out now.  There should be a roundoff of zero
+			 * to show that someone has already taken care of the
+			 * roundoff from the previous sync.
+			 */
+			atomic_inc(&iclog->ic_refcnt);
+			lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+			xlog_state_switch_iclogs(log, iclog, 0);
+			spin_unlock(&log->l_icloglock);
 
-				if (xlog_state_release_iclog(log, iclog))
-					return -EIO;
-
-				if (log_flushed)
-					*log_flushed = 1;
-				spin_lock(&log->l_icloglock);
-				if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
-				    iclog->ic_state != XLOG_STATE_DIRTY)
-					goto maybe_sleep;
-				else
-					goto no_sleep;
-			} else {
-				/* Someone else is writing to this iclog.
-				 * Use its call to flush out the data.  However,
-				 * the other thread may not force out this LR,
-				 * so we mark it WANT_SYNC.
-				 */
-				xlog_state_switch_iclogs(log, iclog, 0);
-				goto maybe_sleep;
-			}
-		}
-	}
+			if (xlog_state_release_iclog(log, iclog))
+				return -EIO;
 
-	/* By the time we come around again, the iclog could've been filled
-	 * which would give it another lsn.  If we have a new lsn, just
-	 * return because the relevant data has been flushed.
-	 */
-maybe_sleep:
-	if (flags & XFS_LOG_SYNC) {
-		/*
-		 * We must check if we're shutting down here, before
-		 * we wait, while we're holding the l_icloglock.
-		 * Then we check again after waking up, in case our
-		 * sleep was disturbed by a bad news.
-		 */
-		if (iclog->ic_state & XLOG_STATE_IOERROR) {
-			spin_unlock(&log->l_icloglock);
-			return -EIO;
+			spin_lock(&log->l_icloglock);
+			if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
+			    iclog->ic_state == XLOG_STATE_DIRTY)
+				goto out_unlock;
+		} else {
+			/*
+			 * Someone else is writing to this iclog.
+			 *
+			 * Use its call to flush out the data.  However, the
+			 * other thread may not force out this LR, so we mark
+			 * it WANT_SYNC.
+			 */
+			xlog_state_switch_iclogs(log, iclog, 0);
 		}
-		XFS_STATS_INC(mp, xs_log_force_sleep);
-		xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+	} else {
 		/*
-		 * No need to grab the log lock here since we're
-		 * only deciding whether or not to return EIO
-		 * and the memory read should be atomic.
+		 * If the head iclog is not active nor dirty, we just attach
+		 * ourselves to the head and go to sleep if necessary.
 		 */
-		if (iclog->ic_state & XLOG_STATE_IOERROR)
-			return -EIO;
-	} else {
-
-no_sleep:
-		spin_unlock(&log->l_icloglock);
+		;
 	}
+
+	if (!(flags & XFS_LOG_SYNC))
+		goto out_unlock;
+
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		goto out_error;
+	XFS_STATS_INC(mp, xs_log_force_sleep);
+	xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		return -EIO;
 	return 0;
-}
 
-/*
- * Wrapper for _xfs_log_force(), to be used when caller doesn't care
- * about errors or whether the log was flushed or not. This is the normal
- * interface to use when trying to unpin items or move the log forward.
- */
-void
-xfs_log_force(
-	xfs_mount_t	*mp,
-	uint		flags)
-{
-	trace_xfs_log_force(mp, 0, _RET_IP_);
-	_xfs_log_force(mp, flags, NULL);
+out_unlock:
+	spin_unlock(&log->l_icloglock);
+	return 0;
+out_error:
+	spin_unlock(&log->l_icloglock);
+	return -EIO;
 }
 
-/*
- * Force the in-core log to disk for a specific LSN.
- *
- * Find in-core log with lsn.
- *	If it is in the DIRTY state, just return.
- *	If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
- *		state and go to sleep or return.
- *	If it is in any other state, go to sleep or return.
- *
- * Synchronous forces are implemented with a signal variable. All callers
- * to force a given lsn to disk will wait on a the sv attached to the
- * specific in-core log.  When given in-core log finally completes its
- * write to disk, that thread will wake up all threads waiting on the
- * sv.
- */
-int
-_xfs_log_force_lsn(
+static int
+__xfs_log_force_lsn(
 	struct xfs_mount	*mp,
 	xfs_lsn_t		lsn,
 	uint			flags,
-	int			*log_flushed)
+	int			*log_flushed,
+	bool			already_slept)
 {
 	struct xlog		*log = mp->m_log;
 	struct xlog_in_core	*iclog;
-	int			already_slept = 0;
 
-	ASSERT(lsn != 0);
-
-	XFS_STATS_INC(mp, xs_log_force);
-
-	lsn = xlog_cil_force_lsn(log, lsn);
-	if (lsn == NULLCOMMITLSN)
-		return 0;
-
-try_again:
 	spin_lock(&log->l_icloglock);
 	iclog = log->l_iclog;
-	if (iclog->ic_state & XLOG_STATE_IOERROR) {
-		spin_unlock(&log->l_icloglock);
-		return -EIO;
-	}
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		goto out_error;
 
-	do {
-		if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
-			iclog = iclog->ic_next;
-			continue;
-		}
+	while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
+		iclog = iclog->ic_next;
+		if (iclog == log->l_iclog)
+			goto out_unlock;
+	}
 
-		if (iclog->ic_state == XLOG_STATE_DIRTY) {
-			spin_unlock(&log->l_icloglock);
-			return 0;
-		}
+	if (iclog->ic_state == XLOG_STATE_DIRTY)
+		goto out_unlock;
 
-		if (iclog->ic_state == XLOG_STATE_ACTIVE) {
-			/*
-			 * We sleep here if we haven't already slept (e.g.
-			 * this is the first time we've looked at the correct
-			 * iclog buf) and the buffer before us is going to
-			 * be sync'ed. The reason for this is that if we
-			 * are doing sync transactions here, by waiting for
-			 * the previous I/O to complete, we can allow a few
-			 * more transactions into this iclog before we close
-			 * it down.
-			 *
-			 * Otherwise, we mark the buffer WANT_SYNC, and bump
-			 * up the refcnt so we can release the log (which
-			 * drops the ref count).  The state switch keeps new
-			 * transaction commits from using this buffer.  When
-			 * the current commits finish writing into the buffer,
-			 * the refcount will drop to zero and the buffer will
-			 * go out then.
-			 */
-			if (!already_slept &&
-			    (iclog->ic_prev->ic_state &
-			     (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
-				ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
+	if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+		/*
+		 * We sleep here if we haven't already slept (e.g. this is the
+		 * first time we've looked at the correct iclog buf) and the
+		 * buffer before us is going to be sync'ed.  The reason for this
+		 * is that if we are doing sync transactions here, by waiting
+		 * for the previous I/O to complete, we can allow a few more
+		 * transactions into this iclog before we close it down.
+		 *
+		 * Otherwise, we mark the buffer WANT_SYNC, and bump up the
+		 * refcnt so we can release the log (which drops the ref count).
+		 * The state switch keeps new transaction commits from using
+		 * this buffer.  When the current commits finish writing into
+		 * the buffer, the refcount will drop to zero and the buffer
+		 * will go out then.
+		 */
+		if (!already_slept &&
+		    (iclog->ic_prev->ic_state &
+		     (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
+			ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
 
-				XFS_STATS_INC(mp, xs_log_force_sleep);
+			XFS_STATS_INC(mp, xs_log_force_sleep);
 
-				xlog_wait(&iclog->ic_prev->ic_write_wait,
-							&log->l_icloglock);
-				already_slept = 1;
-				goto try_again;
-			}
-			atomic_inc(&iclog->ic_refcnt);
-			xlog_state_switch_iclogs(log, iclog, 0);
-			spin_unlock(&log->l_icloglock);
-			if (xlog_state_release_iclog(log, iclog))
-				return -EIO;
-			if (log_flushed)
-				*log_flushed = 1;
-			spin_lock(&log->l_icloglock);
+			xlog_wait(&iclog->ic_prev->ic_write_wait,
+					&log->l_icloglock);
+			return -EAGAIN;
 		}
+		atomic_inc(&iclog->ic_refcnt);
+		xlog_state_switch_iclogs(log, iclog, 0);
+		spin_unlock(&log->l_icloglock);
+		if (xlog_state_release_iclog(log, iclog))
+			return -EIO;
+		if (log_flushed)
+			*log_flushed = 1;
+		spin_lock(&log->l_icloglock);
+	}
 
-		if ((flags & XFS_LOG_SYNC) && /* sleep */
-		    !(iclog->ic_state &
-		      (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
-			/*
-			 * Don't wait on completion if we know that we've
-			 * gotten a log write error.
-			 */
-			if (iclog->ic_state & XLOG_STATE_IOERROR) {
-				spin_unlock(&log->l_icloglock);
-				return -EIO;
-			}
-			XFS_STATS_INC(mp, xs_log_force_sleep);
-			xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
-			/*
-			 * No need to grab the log lock here since we're
-			 * only deciding whether or not to return EIO
-			 * and the memory read should be atomic.
-			 */
-			if (iclog->ic_state & XLOG_STATE_IOERROR)
-				return -EIO;
-		} else {		/* just return */
-			spin_unlock(&log->l_icloglock);
-		}
+	if (!(flags & XFS_LOG_SYNC) ||
+	    (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
+		goto out_unlock;
 
-		return 0;
-	} while (iclog != log->l_iclog);
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		goto out_error;
+
+	XFS_STATS_INC(mp, xs_log_force_sleep);
+	xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		return -EIO;
+	return 0;
 
+out_unlock:
 	spin_unlock(&log->l_icloglock);
 	return 0;
+out_error:
+	spin_unlock(&log->l_icloglock);
+	return -EIO;
 }
 
 /*
- * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
- * about errors or whether the log was flushed or not. This is the normal
- * interface to use when trying to unpin items or move the log forward.
+ * Force the in-core log to disk for a specific LSN.
+ *
+ * Find in-core log with lsn.
+ *	If it is in the DIRTY state, just return.
+ *	If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
+ *		state and go to sleep or return.
+ *	If it is in any other state, go to sleep or return.
+ *
+ * Synchronous forces are implemented with a wait queue.  All callers trying
+ * to force a given lsn to disk must wait on the queue attached to the
+ * specific in-core log.  When given in-core log finally completes its write
+ * to disk, that thread will wake up all threads waiting on the queue.
  */
-void
+int
 xfs_log_force_lsn(
-	xfs_mount_t	*mp,
-	xfs_lsn_t	lsn,
-	uint		flags)
+	struct xfs_mount	*mp,
+	xfs_lsn_t		lsn,
+	uint			flags,
+	int			*log_flushed)
 {
+	int			ret;
+	ASSERT(lsn != 0);
+
+	XFS_STATS_INC(mp, xs_log_force);
 	trace_xfs_log_force(mp, lsn, _RET_IP_);
-	_xfs_log_force_lsn(mp, lsn, flags, NULL);
+
+	lsn = xlog_cil_force_lsn(mp->m_log, lsn);
+	if (lsn == NULLCOMMITLSN)
+		return 0;
+
+	ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
+	if (ret == -EAGAIN)
+		ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
+	return ret;
 }
 
 /*
@@ -4035,7 +3981,7 @@ xfs_log_force_umount(
 	 * to guarantee this.
 	 */
 	if (!logerror)
-		_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+		xfs_log_force(mp, XFS_LOG_SYNC);
 
 	/*
 	 * mark the filesystem and the as in a shutdown state and wake
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index bf212772595c..7e2d62922a16 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -129,18 +129,9 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
 		       struct xlog_ticket *ticket,
 		       struct xlog_in_core **iclog,
 		       bool regrant);
-int	  _xfs_log_force(struct xfs_mount *mp,
-			 uint		flags,
-			 int		*log_forced);
-void	  xfs_log_force(struct xfs_mount	*mp,
-			uint			flags);
-int	  _xfs_log_force_lsn(struct xfs_mount *mp,
-			     xfs_lsn_t		lsn,
-			     uint		flags,
-			     int		*log_forced);
-void	  xfs_log_force_lsn(struct xfs_mount	*mp,
-			    xfs_lsn_t		lsn,
-			    uint		flags);
+int	  xfs_log_force(struct xfs_mount *mp, uint flags);
+int	  xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,
+		int *log_forced);
 int	  xfs_log_mount(struct xfs_mount	*mp,
 			struct xfs_buftarg	*log_target,
 			xfs_daddr_t		start_block,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 43aa42a3a5d3..cb376ac8a595 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -202,7 +202,7 @@ xlog_cil_alloc_shadow_bufs(
 			 */
 			kmem_free(lip->li_lv_shadow);
 
-			lv = kmem_alloc(buf_size, KM_SLEEP|KM_NOFS);
+			lv = kmem_alloc_large(buf_size, KM_SLEEP | KM_NOFS);
 			memset(lv, 0, xlog_cil_iovec_space(niovecs));
 
 			lv->lv_item = lip;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 00240c9ee72e..2b2383f1895e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3173,13 +3173,6 @@ xlog_recover_inode_pass2(
 	/* recover the log dinode inode into the on disk inode */
 	xfs_log_dinode_to_disk(ldip, dip);
 
-	/* the rest is in on-disk format */
-	if (item->ri_buf[1].i_len > isize) {
-		memcpy((char *)dip + isize,
-			item->ri_buf[1].i_addr + isize,
-			item->ri_buf[1].i_len - isize);
-	}
-
 	fields = in_f->ilf_fields;
 	if (fields & XFS_ILOG_DEV)
 		xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
@@ -3252,7 +3245,9 @@ xlog_recover_inode_pass2(
 	}
 
 out_owner_change:
-	if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER))
+	/* Recover the swapext owner change unless inode has been deleted */
+	if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
+	    (dip->di_mode != 0))
 		error = xfs_recover_inode_owner_change(mp, dip, in_f,
 						       buffer_list);
 	/* re-generate the checksum. */
@@ -3434,7 +3429,7 @@ xlog_recover_efi_pass2(
 	}
 	atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
 
-	spin_lock(&log->l_ailp->xa_lock);
+	spin_lock(&log->l_ailp->ail_lock);
 	/*
 	 * The EFI has two references. One for the EFD and one for EFI to ensure
 	 * it makes it into the AIL. Insert the EFI into the AIL directly and
@@ -3477,7 +3472,7 @@ xlog_recover_efd_pass2(
 	 * Search for the EFI with the id in the EFD format structure in the
 	 * AIL.
 	 */
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
 	while (lip != NULL) {
 		if (lip->li_type == XFS_LI_EFI) {
@@ -3487,9 +3482,9 @@ xlog_recover_efd_pass2(
 				 * Drop the EFD reference to the EFI. This
 				 * removes the EFI from the AIL and frees it.
 				 */
-				spin_unlock(&ailp->xa_lock);
+				spin_unlock(&ailp->ail_lock);
 				xfs_efi_release(efip);
-				spin_lock(&ailp->xa_lock);
+				spin_lock(&ailp->ail_lock);
 				break;
 			}
 		}
@@ -3497,7 +3492,7 @@ xlog_recover_efd_pass2(
 	}
 
 	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
 	return 0;
 }
@@ -3530,7 +3525,7 @@ xlog_recover_rui_pass2(
 	}
 	atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
 
-	spin_lock(&log->l_ailp->xa_lock);
+	spin_lock(&log->l_ailp->ail_lock);
 	/*
 	 * The RUI has two references. One for the RUD and one for RUI to ensure
 	 * it makes it into the AIL. Insert the RUI into the AIL directly and
@@ -3570,7 +3565,7 @@ xlog_recover_rud_pass2(
 	 * Search for the RUI with the id in the RUD format structure in the
 	 * AIL.
 	 */
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
 	while (lip != NULL) {
 		if (lip->li_type == XFS_LI_RUI) {
@@ -3580,9 +3575,9 @@ xlog_recover_rud_pass2(
 				 * Drop the RUD reference to the RUI. This
 				 * removes the RUI from the AIL and frees it.
 				 */
-				spin_unlock(&ailp->xa_lock);
+				spin_unlock(&ailp->ail_lock);
 				xfs_rui_release(ruip);
-				spin_lock(&ailp->xa_lock);
+				spin_lock(&ailp->ail_lock);
 				break;
 			}
 		}
@@ -3590,7 +3585,7 @@ xlog_recover_rud_pass2(
 	}
 
 	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
 	return 0;
 }
@@ -3646,7 +3641,7 @@ xlog_recover_cui_pass2(
 	}
 	atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
 
-	spin_lock(&log->l_ailp->xa_lock);
+	spin_lock(&log->l_ailp->ail_lock);
 	/*
 	 * The CUI has two references. One for the CUD and one for CUI to ensure
 	 * it makes it into the AIL. Insert the CUI into the AIL directly and
@@ -3687,7 +3682,7 @@ xlog_recover_cud_pass2(
 	 * Search for the CUI with the id in the CUD format structure in the
 	 * AIL.
 	 */
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
 	while (lip != NULL) {
 		if (lip->li_type == XFS_LI_CUI) {
@@ -3697,9 +3692,9 @@ xlog_recover_cud_pass2(
 				 * Drop the CUD reference to the CUI. This
 				 * removes the CUI from the AIL and frees it.
 				 */
-				spin_unlock(&ailp->xa_lock);
+				spin_unlock(&ailp->ail_lock);
 				xfs_cui_release(cuip);
-				spin_lock(&ailp->xa_lock);
+				spin_lock(&ailp->ail_lock);
 				break;
 			}
 		}
@@ -3707,7 +3702,7 @@ xlog_recover_cud_pass2(
 	}
 
 	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
 	return 0;
 }
@@ -3765,7 +3760,7 @@ xlog_recover_bui_pass2(
 	}
 	atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
 
-	spin_lock(&log->l_ailp->xa_lock);
+	spin_lock(&log->l_ailp->ail_lock);
 	/*
 	 * The RUI has two references. One for the RUD and one for RUI to ensure
 	 * it makes it into the AIL. Insert the RUI into the AIL directly and
@@ -3806,7 +3801,7 @@ xlog_recover_bud_pass2(
 	 * Search for the BUI with the id in the BUD format structure in the
 	 * AIL.
 	 */
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
 	while (lip != NULL) {
 		if (lip->li_type == XFS_LI_BUI) {
@@ -3816,9 +3811,9 @@ xlog_recover_bud_pass2(
 				 * Drop the BUD reference to the BUI. This
 				 * removes the BUI from the AIL and frees it.
 				 */
-				spin_unlock(&ailp->xa_lock);
+				spin_unlock(&ailp->ail_lock);
 				xfs_bui_release(buip);
-				spin_lock(&ailp->xa_lock);
+				spin_lock(&ailp->ail_lock);
 				break;
 			}
 		}
@@ -3826,7 +3821,7 @@ xlog_recover_bud_pass2(
 	}
 
 	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
 	return 0;
 }
@@ -4659,9 +4654,9 @@ xlog_recover_process_efi(
 	if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
 		return 0;
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	error = xfs_efi_recover(mp, efip);
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 
 	return error;
 }
@@ -4677,9 +4672,9 @@ xlog_recover_cancel_efi(
 
 	efip = container_of(lip, struct xfs_efi_log_item, efi_item);
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	xfs_efi_release(efip);
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 }
 
 /* Recover the RUI if necessary. */
@@ -4699,9 +4694,9 @@ xlog_recover_process_rui(
 	if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
 		return 0;
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	error = xfs_rui_recover(mp, ruip);
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 
 	return error;
 }
@@ -4717,9 +4712,9 @@ xlog_recover_cancel_rui(
 
 	ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	xfs_rui_release(ruip);
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 }
 
 /* Recover the CUI if necessary. */
@@ -4740,9 +4735,9 @@ xlog_recover_process_cui(
 	if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
 		return 0;
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	error = xfs_cui_recover(mp, cuip, dfops);
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 
 	return error;
 }
@@ -4758,9 +4753,9 @@ xlog_recover_cancel_cui(
 
 	cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	xfs_cui_release(cuip);
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 }
 
 /* Recover the BUI if necessary. */
@@ -4781,9 +4776,9 @@ xlog_recover_process_bui(
 	if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
 		return 0;
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	error = xfs_bui_recover(mp, buip, dfops);
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 
 	return error;
 }
@@ -4799,9 +4794,9 @@ xlog_recover_cancel_bui(
 
 	buip = container_of(lip, struct xfs_bui_log_item, bui_item);
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	xfs_bui_release(buip);
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 }
 
 /* Is this log item a deferred action intent? */
@@ -4889,7 +4884,7 @@ xlog_recover_process_intents(
 #endif
 
 	ailp = log->l_ailp;
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
 #if defined(DEBUG) || defined(XFS_WARN)
 	last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
@@ -4943,7 +4938,7 @@ xlog_recover_process_intents(
 	}
 out:
 	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	if (error)
 		xfs_defer_cancel(&dfops);
 	else
@@ -4966,7 +4961,7 @@ xlog_recover_cancel_intents(
 	struct xfs_ail		*ailp;
 
 	ailp = log->l_ailp;
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
 	while (lip != NULL) {
 		/*
@@ -5000,7 +4995,7 @@ xlog_recover_cancel_intents(
 	}
 
 	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	return error;
 }
 
@@ -5127,16 +5122,9 @@ xlog_recover_process_iunlinks(
 	xfs_agino_t	agino;
 	int		bucket;
 	int		error;
-	uint		mp_dmevmask;
 
 	mp = log->l_mp;
 
-	/*
-	 * Prevent any DMAPI event from being sent while in this function.
-	 */
-	mp_dmevmask = mp->m_dmevmask;
-	mp->m_dmevmask = 0;
-
 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
 		/*
 		 * Find the agi for this ag.
@@ -5172,8 +5160,6 @@ xlog_recover_process_iunlinks(
 		}
 		xfs_buf_rele(agibp);
 	}
-
-	mp->m_dmevmask = mp_dmevmask;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 98fd41cbb9e1..a901b86772f8 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -803,8 +803,6 @@ xfs_mountfs(
 		 get_unaligned_be16(&sbp->sb_uuid.b[4]);
 	mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
 
-	mp->m_dmevmask = 0;	/* not persistent; set after each mount */
-
 	error = xfs_da_mount(mp);
 	if (error) {
 		xfs_warn(mp, "Failed dir/attr init: %d", error);
@@ -819,8 +817,6 @@ xfs_mountfs(
 	/*
 	 * Allocate and initialize the per-ag data.
 	 */
-	spin_lock_init(&mp->m_perag_lock);
-	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
 	error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
 	if (error) {
 		xfs_warn(mp, "Failed per-ag init: %d", error);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e0792d036be2..10b90bbc5162 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -138,7 +138,6 @@ typedef struct xfs_mount {
 	spinlock_t		m_perag_lock;	/* lock for m_perag_tree */
 	struct mutex		m_growlock;	/* growfs mutex */
 	int			m_fixedfsid[2];	/* unchanged for life of FS */
-	uint			m_dmevmask;	/* DMI events for this FS */
 	uint64_t		m_flags;	/* global mount flags */
 	bool			m_inotbt_nores; /* no per-AG finobt resv. */
 	int			m_ialloc_inos;	/* inodes in inode allocation */
@@ -326,8 +325,9 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 /* per-AG block reservation data structures*/
 enum xfs_ag_resv_type {
 	XFS_AG_RESV_NONE = 0,
-	XFS_AG_RESV_METADATA,
 	XFS_AG_RESV_AGFL,
+	XFS_AG_RESV_METADATA,
+	XFS_AG_RESV_RMAPBT,
 };
 
 struct xfs_ag_resv {
@@ -353,6 +353,7 @@ typedef struct xfs_perag {
 	char		pagi_inodeok;	/* The agi is ok for inodes */
 	uint8_t		pagf_levels[XFS_BTNUM_AGF];
 					/* # of levels in bno & cnt btree */
+	bool		pagf_agflreset; /* agfl requires reset before use */
 	uint32_t	pagf_flcount;	/* count of blocks in freelist */
 	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
 	xfs_extlen_t	pagf_longest;	/* longest free space */
@@ -391,8 +392,8 @@ typedef struct xfs_perag {
 
 	/* Blocks reserved for all kinds of metadata. */
 	struct xfs_ag_resv	pag_meta_resv;
-	/* Blocks reserved for just AGFL-based metadata. */
-	struct xfs_ag_resv	pag_agfl_resv;
+	/* Blocks reserved for the reverse mapping btree. */
+	struct xfs_ag_resv	pag_rmapbt_resv;
 
 	/* reference count */
 	uint8_t			pagf_refcount_level;
@@ -406,8 +407,8 @@ xfs_perag_resv(
 	switch (type) {
 	case XFS_AG_RESV_METADATA:
 		return &pag->pag_meta_resv;
-	case XFS_AG_RESV_AGFL:
-		return &pag->pag_agfl_resv;
+	case XFS_AG_RESV_RMAPBT:
+		return &pag->pag_rmapbt_resv;
 	default:
 		return NULL;
 	}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 270246943a06..cdbd342a5249 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -394,7 +394,7 @@ xfs_reflink_allocate_cow(
 
 retry:
 	ASSERT(xfs_is_reflink_inode(ip));
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
 	/*
 	 * Even if the extent is not shared we might have a preallocation for
@@ -668,7 +668,7 @@ xfs_reflink_cancel_cow_range(
 
 	/* Start a rolling transaction to remove the mappings */
 	error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-			0, 0, 0, &tp);
+			0, 0, XFS_TRANS_NOFS, &tp);
 	if (error)
 		goto out;
 
@@ -741,7 +741,7 @@ xfs_reflink_end_cow(
 			(unsigned int)(end_fsb - offset_fsb),
 			XFS_DATA_FORK);
 	error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-			resblks, 0, XFS_TRANS_RESERVE, &tp);
+			resblks, 0, XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
 	if (error)
 		goto out;
 
@@ -762,10 +762,8 @@ xfs_reflink_end_cow(
 		xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
 
 		/* Extent delete may have bumped ext forward */
-		if (!del.br_blockcount) {
-			xfs_iext_prev(ifp, &icur);
-			goto next_extent;
-		}
+		if (!del.br_blockcount)
+			goto prev_extent;
 
 		ASSERT(!isnullstartblock(got.br_startblock));
 
@@ -774,10 +772,8 @@ xfs_reflink_end_cow(
 		 * speculatively preallocated CoW extents that have been
 		 * allocated but have not yet been involved in a write.
 		 */
-		if (got.br_state == XFS_EXT_UNWRITTEN) {
-			xfs_iext_prev(ifp, &icur);
-			goto next_extent;
-		}
+		if (got.br_state == XFS_EXT_UNWRITTEN)
+			goto prev_extent;
 
 		/* Unmap the old blocks in the data fork. */
 		xfs_defer_init(&dfops, &firstfsb);
@@ -816,9 +812,12 @@ xfs_reflink_end_cow(
 		error = xfs_defer_finish(&tp, &dfops);
 		if (error)
 			goto out_defer;
-next_extent:
 		if (!xfs_iext_get_extent(ifp, &icur, &got))
 			break;
+		continue;
+prev_extent:
+		if (!xfs_iext_prev_extent(ifp, &icur, &got))
+			break;
 	}
 
 	error = xfs_trans_commit(tp);
@@ -1061,7 +1060,7 @@ xfs_reflink_ag_has_free_space(
 		return 0;
 
 	pag = xfs_perag_get(mp, agno);
-	if (xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) ||
+	if (xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) ||
 	    xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA))
 		error = -ENOSPC;
 	xfs_perag_put(pag);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 93588ea3d3d2..612c1d5348b3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -972,7 +972,6 @@ xfs_fs_destroy_inode(
 	struct inode		*inode)
 {
 	struct xfs_inode	*ip = XFS_I(inode);
-	int			error;
 
 	trace_xfs_destroy_inode(ip);
 
@@ -980,14 +979,6 @@ xfs_fs_destroy_inode(
 	XFS_STATS_INC(ip->i_mount, vn_rele);
 	XFS_STATS_INC(ip->i_mount, vn_remove);
 
-	if (xfs_is_reflink_inode(ip)) {
-		error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
-		if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
-			xfs_warn(ip->i_mount,
-"Error %d while evicting CoW blocks for inode %llu.",
-					error, ip->i_ino);
-	}
-
 	xfs_inactive(ip);
 
 	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
@@ -1009,6 +1000,28 @@ xfs_fs_destroy_inode(
 	xfs_inode_set_reclaim_tag(ip);
 }
 
+static void
+xfs_fs_dirty_inode(
+	struct inode			*inode,
+	int				flag)
+{
+	struct xfs_inode		*ip = XFS_I(inode);
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_trans		*tp;
+
+	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
+		return;
+	if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
+		return;
+
+	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
+		return;
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+	xfs_trans_commit(tp);
+}
+
 /*
  * Slab object creation initialisation for the XFS inode.
  * This covers only the idempotent fields in the XFS inode;
@@ -1566,29 +1579,48 @@ xfs_destroy_percpu_counters(
 	percpu_counter_destroy(&mp->m_fdblocks);
 }
 
-STATIC int
-xfs_fs_fill_super(
-	struct super_block	*sb,
-	void			*data,
-	int			silent)
+static struct xfs_mount *
+xfs_mount_alloc(
+	struct super_block	*sb)
 {
-	struct inode		*root;
-	struct xfs_mount	*mp = NULL;
-	int			flags = 0, error = -ENOMEM;
+	struct xfs_mount	*mp;
 
 	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
 	if (!mp)
-		goto out;
+		return NULL;
 
+	mp->m_super = sb;
 	spin_lock_init(&mp->m_sb_lock);
+	spin_lock_init(&mp->m_agirotor_lock);
+	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
+	spin_lock_init(&mp->m_perag_lock);
 	mutex_init(&mp->m_growlock);
 	atomic_set(&mp->m_active_trans, 0);
 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
 	INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
 	INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
 	mp->m_kobj.kobject.kset = xfs_kset;
+	return mp;
+}
 
-	mp->m_super = sb;
+
+STATIC int
+xfs_fs_fill_super(
+	struct super_block	*sb,
+	void			*data,
+	int			silent)
+{
+	struct inode		*root;
+	struct xfs_mount	*mp = NULL;
+	int			flags = 0, error = -ENOMEM;
+
+	/*
+	 * allocate mp and do all low-level struct initializations before we
+	 * attach it to the super
+	 */
+	mp = xfs_mount_alloc(sb);
+	if (!mp)
+		goto out;
 	sb->s_fs_info = mp;
 
 	error = xfs_parseargs(mp, (char *)data);
@@ -1789,6 +1821,7 @@ xfs_fs_free_cached_objects(
 static const struct super_operations xfs_super_operations = {
 	.alloc_inode		= xfs_fs_alloc_inode,
 	.destroy_inode		= xfs_fs_destroy_inode,
+	.dirty_inode		= xfs_fs_dirty_inode,
 	.drop_inode		= xfs_fs_drop_inode,
 	.put_super		= xfs_fs_put_super,
 	.sync_fs		= xfs_fs_sync_fs,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 945de08af7ba..a982c0b623d0 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1477,7 +1477,7 @@ TRACE_EVENT(xfs_extent_busy_trim,
 		  __entry->tlen)
 );
 
-TRACE_EVENT(xfs_agf,
+DECLARE_EVENT_CLASS(xfs_agf_class,
 	TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
 		 unsigned long caller_ip),
 	TP_ARGS(mp, agf, flags, caller_ip),
@@ -1533,6 +1533,13 @@ TRACE_EVENT(xfs_agf,
 		  __entry->longest,
 		  (void *)__entry->caller_ip)
 );
+#define DEFINE_AGF_EVENT(name) \
+DEFINE_EVENT(xfs_agf_class, name, \
+	TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, \
+		 unsigned long caller_ip), \
+	TP_ARGS(mp, agf, flags, caller_ip))
+DEFINE_AGF_EVENT(xfs_agf);
+DEFINE_AGF_EVENT(xfs_agfl_reset);
 
 TRACE_EVENT(xfs_free_extent,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 86f92df32c42..d6d8f9d129a7 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -119,8 +119,11 @@ xfs_trans_dup(
 	/* We gave our writer reference to the new transaction */
 	tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
 	ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
+
+	ASSERT(tp->t_blk_res >= tp->t_blk_res_used);
 	ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
 	tp->t_blk_res = tp->t_blk_res_used;
+
 	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
 	tp->t_rtx_res = tp->t_rtx_res_used;
 	ntp->t_pflags = tp->t_pflags;
@@ -344,13 +347,14 @@ xfs_trans_mod_sb(
 		break;
 	case XFS_TRANS_SB_FDBLOCKS:
 		/*
-		 * Track the number of blocks allocated in the
-		 * transaction.  Make sure it does not exceed the
-		 * number reserved.
+		 * Track the number of blocks allocated in the transaction.
+		 * Make sure it does not exceed the number reserved. If so,
+		 * shutdown as this can lead to accounting inconsistency.
 		 */
 		if (delta < 0) {
 			tp->t_blk_res_used += (uint)-delta;
-			ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
+			if (tp->t_blk_res_used > tp->t_blk_res)
+				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 		}
 		tp->t_fdblocks_delta += delta;
 		if (xfs_sb_version_haslazysbcount(&mp->m_sb))
@@ -803,8 +807,8 @@ xfs_log_item_batch_insert(
 {
 	int	i;
 
-	spin_lock(&ailp->xa_lock);
-	/* xfs_trans_ail_update_bulk drops ailp->xa_lock */
+	spin_lock(&ailp->ail_lock);
+	/* xfs_trans_ail_update_bulk drops ailp->ail_lock */
 	xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
 
 	for (i = 0; i < nr_items; i++) {
@@ -847,9 +851,9 @@ xfs_trans_committed_bulk(
 	struct xfs_ail_cursor	cur;
 	int			i = 0;
 
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
 	/* unpin all the log items */
 	for (lv = log_vector; lv; lv = lv->lv_next ) {
@@ -869,7 +873,7 @@ xfs_trans_committed_bulk(
 		 * object into the AIL as we are in a shutdown situation.
 		 */
 		if (aborted) {
-			ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
+			ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
 			lip->li_ops->iop_unpin(lip, 1);
 			continue;
 		}
@@ -883,11 +887,11 @@ xfs_trans_committed_bulk(
 			 * not affect the AIL cursor the bulk insert path is
 			 * using.
 			 */
-			spin_lock(&ailp->xa_lock);
+			spin_lock(&ailp->ail_lock);
 			if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
 				xfs_trans_ail_update(ailp, lip, item_lsn);
 			else
-				spin_unlock(&ailp->xa_lock);
+				spin_unlock(&ailp->ail_lock);
 			lip->li_ops->iop_unpin(lip, 0);
 			continue;
 		}
@@ -905,9 +909,9 @@ xfs_trans_committed_bulk(
 	if (i)
 		xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
 
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 }
 
 /*
@@ -966,7 +970,7 @@ __xfs_trans_commit(
 	 * log out now and wait for it.
 	 */
 	if (sync) {
-		error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
+		error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
 		XFS_STATS_INC(mp, xs_trans_sync);
 	} else {
 		XFS_STATS_INC(mp, xs_trans_async);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index cef89f7127d3..d4a2445215e6 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -40,7 +40,7 @@ xfs_ail_check(
 {
 	xfs_log_item_t	*prev_lip;
 
-	if (list_empty(&ailp->xa_ail))
+	if (list_empty(&ailp->ail_head))
 		return;
 
 	/*
@@ -48,11 +48,11 @@ xfs_ail_check(
 	 */
 	ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
 	prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
-	if (&prev_lip->li_ail != &ailp->xa_ail)
+	if (&prev_lip->li_ail != &ailp->ail_head)
 		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
 
 	prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
-	if (&prev_lip->li_ail != &ailp->xa_ail)
+	if (&prev_lip->li_ail != &ailp->ail_head)
 		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
 
 
@@ -69,10 +69,10 @@ static xfs_log_item_t *
 xfs_ail_max(
 	struct xfs_ail  *ailp)
 {
-	if (list_empty(&ailp->xa_ail))
+	if (list_empty(&ailp->ail_head))
 		return NULL;
 
-	return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail);
+	return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail);
 }
 
 /*
@@ -84,7 +84,7 @@ xfs_ail_next(
 	struct xfs_ail  *ailp,
 	xfs_log_item_t  *lip)
 {
-	if (lip->li_ail.next == &ailp->xa_ail)
+	if (lip->li_ail.next == &ailp->ail_head)
 		return NULL;
 
 	return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
@@ -105,11 +105,11 @@ xfs_ail_min_lsn(
 	xfs_lsn_t	lsn = 0;
 	xfs_log_item_t	*lip;
 
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	lip = xfs_ail_min(ailp);
 	if (lip)
 		lsn = lip->li_lsn;
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
 	return lsn;
 }
@@ -124,11 +124,11 @@ xfs_ail_max_lsn(
 	xfs_lsn_t       lsn = 0;
 	xfs_log_item_t  *lip;
 
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	lip = xfs_ail_max(ailp);
 	if (lip)
 		lsn = lip->li_lsn;
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
 	return lsn;
 }
@@ -146,7 +146,7 @@ xfs_trans_ail_cursor_init(
 	struct xfs_ail_cursor	*cur)
 {
 	cur->item = NULL;
-	list_add_tail(&cur->list, &ailp->xa_cursors);
+	list_add_tail(&cur->list, &ailp->ail_cursors);
 }
 
 /*
@@ -194,7 +194,7 @@ xfs_trans_ail_cursor_clear(
 {
 	struct xfs_ail_cursor	*cur;
 
-	list_for_each_entry(cur, &ailp->xa_cursors, list) {
+	list_for_each_entry(cur, &ailp->ail_cursors, list) {
 		if (cur->item == lip)
 			cur->item = (struct xfs_log_item *)
 					((uintptr_t)cur->item | 1);
@@ -222,7 +222,7 @@ xfs_trans_ail_cursor_first(
 		goto out;
 	}
 
-	list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
+	list_for_each_entry(lip, &ailp->ail_head, li_ail) {
 		if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
 			goto out;
 	}
@@ -241,7 +241,7 @@ __xfs_trans_ail_cursor_last(
 {
 	xfs_log_item_t		*lip;
 
-	list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
+	list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
 		if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
 			return lip;
 	}
@@ -310,7 +310,7 @@ xfs_ail_splice(
 	if (lip)
 		list_splice(list, &lip->li_ail);
 	else
-		list_splice(list, &ailp->xa_ail);
+		list_splice(list, &ailp->ail_head);
 }
 
 /*
@@ -335,17 +335,17 @@ xfsaild_push_item(
 	 * If log item pinning is enabled, skip the push and track the item as
 	 * pinned. This can help induce head-behind-tail conditions.
 	 */
-	if (XFS_TEST_ERROR(false, ailp->xa_mount, XFS_ERRTAG_LOG_ITEM_PIN))
+	if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
 		return XFS_ITEM_PINNED;
 
-	return lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
+	return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
 }
 
 static long
 xfsaild_push(
 	struct xfs_ail		*ailp)
 {
-	xfs_mount_t		*mp = ailp->xa_mount;
+	xfs_mount_t		*mp = ailp->ail_mount;
 	struct xfs_ail_cursor	cur;
 	xfs_log_item_t		*lip;
 	xfs_lsn_t		lsn;
@@ -360,30 +360,30 @@ xfsaild_push(
 	 * buffers the last time we ran, force the log first and wait for it
 	 * before pushing again.
 	 */
-	if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 &&
-	    (!list_empty_careful(&ailp->xa_buf_list) ||
+	if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 &&
+	    (!list_empty_careful(&ailp->ail_buf_list) ||
 	     xfs_ail_min_lsn(ailp))) {
-		ailp->xa_log_flush = 0;
+		ailp->ail_log_flush = 0;
 
 		XFS_STATS_INC(mp, xs_push_ail_flush);
 		xfs_log_force(mp, XFS_LOG_SYNC);
 	}
 
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 
-	/* barrier matches the xa_target update in xfs_ail_push() */
+	/* barrier matches the ail_target update in xfs_ail_push() */
 	smp_rmb();
-	target = ailp->xa_target;
-	ailp->xa_target_prev = target;
+	target = ailp->ail_target;
+	ailp->ail_target_prev = target;
 
-	lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
+	lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
 	if (!lip) {
 		/*
 		 * If the AIL is empty or our push has reached the end we are
 		 * done now.
 		 */
 		xfs_trans_ail_cursor_done(&cur);
-		spin_unlock(&ailp->xa_lock);
+		spin_unlock(&ailp->ail_lock);
 		goto out_done;
 	}
 
@@ -404,7 +404,7 @@ xfsaild_push(
 			XFS_STATS_INC(mp, xs_push_ail_success);
 			trace_xfs_ail_push(lip);
 
-			ailp->xa_last_pushed_lsn = lsn;
+			ailp->ail_last_pushed_lsn = lsn;
 			break;
 
 		case XFS_ITEM_FLUSHING:
@@ -423,7 +423,7 @@ xfsaild_push(
 			trace_xfs_ail_flushing(lip);
 
 			flushing++;
-			ailp->xa_last_pushed_lsn = lsn;
+			ailp->ail_last_pushed_lsn = lsn;
 			break;
 
 		case XFS_ITEM_PINNED:
@@ -431,7 +431,7 @@ xfsaild_push(
 			trace_xfs_ail_pinned(lip);
 
 			stuck++;
-			ailp->xa_log_flush++;
+			ailp->ail_log_flush++;
 			break;
 		case XFS_ITEM_LOCKED:
 			XFS_STATS_INC(mp, xs_push_ail_locked);
@@ -468,10 +468,10 @@ xfsaild_push(
 		lsn = lip->li_lsn;
 	}
 	xfs_trans_ail_cursor_done(&cur);
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
-	if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))
-		ailp->xa_log_flush++;
+	if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
+		ailp->ail_log_flush++;
 
 	if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
 out_done:
@@ -481,7 +481,7 @@ out_done:
 		 * AIL before we start the next scan from the start of the AIL.
 		 */
 		tout = 50;
-		ailp->xa_last_pushed_lsn = 0;
+		ailp->ail_last_pushed_lsn = 0;
 	} else if (((stuck + flushing) * 100) / count > 90) {
 		/*
 		 * Either there is a lot of contention on the AIL or we are
@@ -494,7 +494,7 @@ out_done:
 		 * the restart to issue a log force to unpin the stuck items.
 		 */
 		tout = 20;
-		ailp->xa_last_pushed_lsn = 0;
+		ailp->ail_last_pushed_lsn = 0;
 	} else {
 		/*
 		 * Assume we have more work to do in a short while.
@@ -536,26 +536,26 @@ xfsaild(
 			break;
 		}
 
-		spin_lock(&ailp->xa_lock);
+		spin_lock(&ailp->ail_lock);
 
 		/*
 		 * Idle if the AIL is empty and we are not racing with a target
 		 * update. We check the AIL after we set the task to a sleep
-		 * state to guarantee that we either catch an xa_target update
+		 * state to guarantee that we either catch an ail_target update
 		 * or that a wake_up resets the state to TASK_RUNNING.
 		 * Otherwise, we run the risk of sleeping indefinitely.
 		 *
-		 * The barrier matches the xa_target update in xfs_ail_push().
+		 * The barrier matches the ail_target update in xfs_ail_push().
 		 */
 		smp_rmb();
 		if (!xfs_ail_min(ailp) &&
-		    ailp->xa_target == ailp->xa_target_prev) {
-			spin_unlock(&ailp->xa_lock);
+		    ailp->ail_target == ailp->ail_target_prev) {
+			spin_unlock(&ailp->ail_lock);
 			freezable_schedule();
 			tout = 0;
 			continue;
 		}
-		spin_unlock(&ailp->xa_lock);
+		spin_unlock(&ailp->ail_lock);
 
 		if (tout)
 			freezable_schedule_timeout(msecs_to_jiffies(tout));
@@ -592,8 +592,8 @@ xfs_ail_push(
 	xfs_log_item_t	*lip;
 
 	lip = xfs_ail_min(ailp);
-	if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
-	    XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
+	if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
+	    XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0)
 		return;
 
 	/*
@@ -601,10 +601,10 @@ xfs_ail_push(
 	 * the XFS_AIL_PUSHING_BIT.
 	 */
 	smp_wmb();
-	xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
+	xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);
 	smp_wmb();
 
-	wake_up_process(ailp->xa_task);
+	wake_up_process(ailp->ail_task);
 }
 
 /*
@@ -630,18 +630,18 @@ xfs_ail_push_all_sync(
 	struct xfs_log_item	*lip;
 	DEFINE_WAIT(wait);
 
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	while ((lip = xfs_ail_max(ailp)) != NULL) {
-		prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE);
-		ailp->xa_target = lip->li_lsn;
-		wake_up_process(ailp->xa_task);
-		spin_unlock(&ailp->xa_lock);
+		prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
+		ailp->ail_target = lip->li_lsn;
+		wake_up_process(ailp->ail_task);
+		spin_unlock(&ailp->ail_lock);
 		schedule();
-		spin_lock(&ailp->xa_lock);
+		spin_lock(&ailp->ail_lock);
 	}
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 
-	finish_wait(&ailp->xa_empty, &wait);
+	finish_wait(&ailp->ail_empty, &wait);
 }
 
 /*
@@ -672,7 +672,7 @@ xfs_trans_ail_update_bulk(
 	struct xfs_ail_cursor	*cur,
 	struct xfs_log_item	**log_items,
 	int			nr_items,
-	xfs_lsn_t		lsn) __releases(ailp->xa_lock)
+	xfs_lsn_t		lsn) __releases(ailp->ail_lock)
 {
 	xfs_log_item_t		*mlip;
 	int			mlip_changed = 0;
@@ -705,13 +705,13 @@ xfs_trans_ail_update_bulk(
 		xfs_ail_splice(ailp, cur, &tmp, lsn);
 
 	if (mlip_changed) {
-		if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
-			xlog_assign_tail_lsn_locked(ailp->xa_mount);
-		spin_unlock(&ailp->xa_lock);
+		if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
+			xlog_assign_tail_lsn_locked(ailp->ail_mount);
+		spin_unlock(&ailp->ail_lock);
 
-		xfs_log_space_wake(ailp->xa_mount);
+		xfs_log_space_wake(ailp->ail_mount);
 	} else {
-		spin_unlock(&ailp->xa_lock);
+		spin_unlock(&ailp->ail_lock);
 	}
 }
 
@@ -756,13 +756,13 @@ void
 xfs_trans_ail_delete(
 	struct xfs_ail		*ailp,
 	struct xfs_log_item	*lip,
-	int			shutdown_type) __releases(ailp->xa_lock)
+	int			shutdown_type) __releases(ailp->ail_lock)
 {
-	struct xfs_mount	*mp = ailp->xa_mount;
+	struct xfs_mount	*mp = ailp->ail_mount;
 	bool			mlip_changed;
 
 	if (!(lip->li_flags & XFS_LI_IN_AIL)) {
-		spin_unlock(&ailp->xa_lock);
+		spin_unlock(&ailp->ail_lock);
 		if (!XFS_FORCED_SHUTDOWN(mp)) {
 			xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
 	"%s: attempting to delete a log item that is not in the AIL",
@@ -776,13 +776,13 @@ xfs_trans_ail_delete(
 	if (mlip_changed) {
 		if (!XFS_FORCED_SHUTDOWN(mp))
 			xlog_assign_tail_lsn_locked(mp);
-		if (list_empty(&ailp->xa_ail))
-			wake_up_all(&ailp->xa_empty);
+		if (list_empty(&ailp->ail_head))
+			wake_up_all(&ailp->ail_empty);
 	}
 
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 	if (mlip_changed)
-		xfs_log_space_wake(ailp->xa_mount);
+		xfs_log_space_wake(ailp->ail_mount);
 }
 
 int
@@ -795,16 +795,16 @@ xfs_trans_ail_init(
 	if (!ailp)
 		return -ENOMEM;
 
-	ailp->xa_mount = mp;
-	INIT_LIST_HEAD(&ailp->xa_ail);
-	INIT_LIST_HEAD(&ailp->xa_cursors);
-	spin_lock_init(&ailp->xa_lock);
-	INIT_LIST_HEAD(&ailp->xa_buf_list);
-	init_waitqueue_head(&ailp->xa_empty);
+	ailp->ail_mount = mp;
+	INIT_LIST_HEAD(&ailp->ail_head);
+	INIT_LIST_HEAD(&ailp->ail_cursors);
+	spin_lock_init(&ailp->ail_lock);
+	INIT_LIST_HEAD(&ailp->ail_buf_list);
+	init_waitqueue_head(&ailp->ail_empty);
 
-	ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
-			ailp->xa_mount->m_fsname);
-	if (IS_ERR(ailp->xa_task))
+	ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
+			ailp->ail_mount->m_fsname);
+	if (IS_ERR(ailp->ail_task))
 		goto out_free_ailp;
 
 	mp->m_ail = ailp;
@@ -821,6 +821,6 @@ xfs_trans_ail_destroy(
 {
 	struct xfs_ail	*ailp = mp->m_ail;
 
-	kthread_stop(ailp->xa_task);
+	kthread_stop(ailp->ail_task);
 	kmem_free(ailp);
 }
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 653ce379d36b..a5d9dfc45d98 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -431,8 +431,8 @@ xfs_trans_brelse(
 	 * If the fs has shutdown and we dropped the last reference, it may fall
 	 * on us to release a (possibly dirty) bli if it never made it to the
 	 * AIL (e.g., the aborted unpin already happened and didn't release it
-	 * due to our reference). Since we're already shutdown and need xa_lock,
-	 * just force remove from the AIL and release the bli here.
+	 * due to our reference). Since we're already shutdown and need
+	 * ail_lock, just force remove from the AIL and release the bli here.
 	 */
 	if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
 		xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4a89da4b6fe7..07cea592dc01 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -98,10 +98,24 @@ xfs_trans_log_inode(
 	xfs_inode_t	*ip,
 	uint		flags)
 {
+	struct inode	*inode = VFS_I(ip);
+
 	ASSERT(ip->i_itemp != NULL);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
 	/*
+	 * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
+	 * don't matter - we either will need an extra transaction in 24 hours
+	 * to log the timestamps, or will clear already cleared fields in the
+	 * worst case.
+	 */
+	if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
+		spin_lock(&inode->i_lock);
+		inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+		spin_unlock(&inode->i_lock);
+	}
+
+	/*
 	 * Record the specific change for fdatasync optimisation. This
 	 * allows fdatasync to skip log forces for inodes that are only
 	 * timestamp dirty. We do this before the change count so that
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index b317a3644c00..be24b0c8a332 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -65,17 +65,17 @@ struct xfs_ail_cursor {
  * Eventually we need to drive the locking in here as well.
  */
 struct xfs_ail {
-	struct xfs_mount	*xa_mount;
-	struct task_struct	*xa_task;
-	struct list_head	xa_ail;
-	xfs_lsn_t		xa_target;
-	xfs_lsn_t		xa_target_prev;
-	struct list_head	xa_cursors;
-	spinlock_t		xa_lock;
-	xfs_lsn_t		xa_last_pushed_lsn;
-	int			xa_log_flush;
-	struct list_head	xa_buf_list;
-	wait_queue_head_t	xa_empty;
+	struct xfs_mount	*ail_mount;
+	struct task_struct	*ail_task;
+	struct list_head	ail_head;
+	xfs_lsn_t		ail_target;
+	xfs_lsn_t		ail_target_prev;
+	struct list_head	ail_cursors;
+	spinlock_t		ail_lock;
+	xfs_lsn_t		ail_last_pushed_lsn;
+	int			ail_log_flush;
+	struct list_head	ail_buf_list;
+	wait_queue_head_t	ail_empty;
 };
 
 /*
@@ -84,7 +84,7 @@ struct xfs_ail {
 void	xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
 				struct xfs_ail_cursor *cur,
 				struct xfs_log_item **log_items, int nr_items,
-				xfs_lsn_t lsn) __releases(ailp->xa_lock);
+				xfs_lsn_t lsn) __releases(ailp->ail_lock);
 /*
  * Return a pointer to the first item in the AIL.  If the AIL is empty, then
  * return NULL.
@@ -93,7 +93,7 @@ static inline struct xfs_log_item *
 xfs_ail_min(
 	struct xfs_ail  *ailp)
 {
-	return list_first_entry_or_null(&ailp->xa_ail, struct xfs_log_item,
+	return list_first_entry_or_null(&ailp->ail_head, struct xfs_log_item,
 					li_ail);
 }
 
@@ -101,14 +101,14 @@ static inline void
 xfs_trans_ail_update(
 	struct xfs_ail		*ailp,
 	struct xfs_log_item	*lip,
-	xfs_lsn_t		lsn) __releases(ailp->xa_lock)
+	xfs_lsn_t		lsn) __releases(ailp->ail_lock)
 {
 	xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
 }
 
 bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
 void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
-		int shutdown_type) __releases(ailp->xa_lock);
+		int shutdown_type) __releases(ailp->ail_lock);
 
 static inline void
 xfs_trans_ail_remove(
@@ -117,12 +117,12 @@ xfs_trans_ail_remove(
 {
 	struct xfs_ail		*ailp = lip->li_ailp;
 
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	/* xfs_trans_ail_delete() drops the AIL lock */
 	if (lip->li_flags & XFS_LI_IN_AIL)
 		xfs_trans_ail_delete(ailp, lip, shutdown_type);
 	else
-		spin_unlock(&ailp->xa_lock);
+		spin_unlock(&ailp->ail_lock);
 }
 
 void			xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
@@ -149,9 +149,9 @@ xfs_trans_ail_copy_lsn(
 	xfs_lsn_t	*src)
 {
 	ASSERT(sizeof(xfs_lsn_t) == 8);	/* don't lock if it shrinks */
-	spin_lock(&ailp->xa_lock);
+	spin_lock(&ailp->ail_lock);
 	*dst = *src;
-	spin_unlock(&ailp->xa_lock);
+	spin_unlock(&ailp->ail_lock);
 }
 #else
 static inline void
@@ -172,7 +172,7 @@ xfs_clear_li_failed(
 	struct xfs_buf	*bp = lip->li_buf;
 
 	ASSERT(lip->li_flags & XFS_LI_IN_AIL);
-	lockdep_assert_held(&lip->li_ailp->xa_lock);
+	lockdep_assert_held(&lip->li_ailp->ail_lock);
 
 	if (lip->li_flags & XFS_LI_FAILED) {
 		lip->li_flags &= ~XFS_LI_FAILED;
@@ -186,7 +186,7 @@ xfs_set_li_failed(
 	struct xfs_log_item	*lip,
 	struct xfs_buf		*bp)
 {
-	lockdep_assert_held(&lip->li_ailp->xa_lock);
+	lockdep_assert_held(&lip->li_ailp->ail_lock);
 
 	if (!(lip->li_flags & XFS_LI_FAILED)) {
 		xfs_buf_hold(bp);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8add3493a202..278841c75b97 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -599,7 +599,6 @@
 	IRQCHIP_OF_MATCH_TABLE()					\
 	ACPI_PROBE_TABLE(irqchip)					\
 	ACPI_PROBE_TABLE(timer)						\
-	ACPI_PROBE_TABLE(iort)						\
 	EARLYCON_TABLE()
 
 #define INIT_TEXT							\
diff --git a/include/crypto/ablk_helper.h b/include/crypto/ablk_helper.h
deleted file mode 100644
index 4e655c2a4e15..000000000000
--- a/include/crypto/ablk_helper.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Shared async block cipher helpers
- */
-
-#ifndef _CRYPTO_ABLK_HELPER_H
-#define _CRYPTO_ABLK_HELPER_H
-
-#include <linux/crypto.h>
-#include <linux/kernel.h>
-#include <crypto/cryptd.h>
-
-struct async_helper_ctx {
-	struct cryptd_ablkcipher *cryptd_tfm;
-};
-
-extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
-			unsigned int key_len);
-
-extern int __ablk_encrypt(struct ablkcipher_request *req);
-
-extern int ablk_encrypt(struct ablkcipher_request *req);
-
-extern int ablk_decrypt(struct ablkcipher_request *req);
-
-extern void ablk_exit(struct crypto_tfm *tfm);
-
-extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
-
-extern int ablk_init(struct crypto_tfm *tfm);
-
-#endif /* _CRYPTO_ABLK_HELPER_H */
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index e3cebf640c00..1aba888241dd 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -30,7 +30,6 @@ struct crypto_type {
 	int (*init_tfm)(struct crypto_tfm *tfm);
 	void (*show)(struct seq_file *m, struct crypto_alg *alg);
 	int (*report)(struct sk_buff *skb, struct crypto_alg *alg);
-	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask);
 	void (*free)(struct crypto_instance *inst);
 
 	unsigned int type;
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index dd04c1699b51..1cbec29af3d6 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -17,7 +17,10 @@
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <crypto/algapi.h>
+#include <crypto/aead.h>
+#include <crypto/akcipher.h>
 #include <crypto/hash.h>
+#include <crypto/skcipher.h>
 
 #define ENGINE_NAME_LEN	30
 /*
@@ -37,12 +40,6 @@
  * @unprepare_crypt_hardware: there are currently no more requests on the
  * queue so the subsystem notifies the driver that it may relax the
  * hardware by issuing this call
- * @prepare_cipher_request: do some prepare if need before handle the current request
- * @unprepare_cipher_request: undo any work done by prepare_cipher_request()
- * @cipher_one_request: do encryption for current request
- * @prepare_hash_request: do some prepare if need before handle the current request
- * @unprepare_hash_request: undo any work done by prepare_hash_request()
- * @hash_one_request: do hash for current request
  * @kworker: kthread worker struct for request pump
  * @pump_requests: work struct for scheduling work to the request pump
  * @priv_data: the engine private data
@@ -65,19 +62,6 @@ struct crypto_engine {
 	int (*prepare_crypt_hardware)(struct crypto_engine *engine);
 	int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
 
-	int (*prepare_cipher_request)(struct crypto_engine *engine,
-				      struct ablkcipher_request *req);
-	int (*unprepare_cipher_request)(struct crypto_engine *engine,
-					struct ablkcipher_request *req);
-	int (*prepare_hash_request)(struct crypto_engine *engine,
-				    struct ahash_request *req);
-	int (*unprepare_hash_request)(struct crypto_engine *engine,
-				      struct ahash_request *req);
-	int (*cipher_one_request)(struct crypto_engine *engine,
-				  struct ablkcipher_request *req);
-	int (*hash_one_request)(struct crypto_engine *engine,
-				struct ahash_request *req);
-
 	struct kthread_worker           *kworker;
 	struct kthread_work             pump_requests;
 
@@ -85,19 +69,45 @@ struct crypto_engine {
 	struct crypto_async_request	*cur_req;
 };
 
-int crypto_transfer_cipher_request(struct crypto_engine *engine,
-				   struct ablkcipher_request *req,
-				   bool need_pump);
-int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
-					     struct ablkcipher_request *req);
-int crypto_transfer_hash_request(struct crypto_engine *engine,
-				 struct ahash_request *req, bool need_pump);
+/*
+ * struct crypto_engine_op - crypto hardware engine operations
+ * @prepare__request: do some prepare if need before handle the current request
+ * @unprepare_request: undo any work done by prepare_request()
+ * @do_one_request: do encryption for current request
+ */
+struct crypto_engine_op {
+	int (*prepare_request)(struct crypto_engine *engine,
+			       void *areq);
+	int (*unprepare_request)(struct crypto_engine *engine,
+				 void *areq);
+	int (*do_one_request)(struct crypto_engine *engine,
+			      void *areq);
+};
+
+struct crypto_engine_ctx {
+	struct crypto_engine_op op;
+};
+
+int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
+						 struct ablkcipher_request *req);
+int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
+					   struct aead_request *req);
+int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
+					       struct akcipher_request *req);
 int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
-					   struct ahash_request *req);
-void crypto_finalize_cipher_request(struct crypto_engine *engine,
-				    struct ablkcipher_request *req, int err);
+					       struct ahash_request *req);
+int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
+					       struct skcipher_request *req);
+void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
+					struct ablkcipher_request *req, int err);
+void crypto_finalize_aead_request(struct crypto_engine *engine,
+				  struct aead_request *req, int err);
+void crypto_finalize_akcipher_request(struct crypto_engine *engine,
+				      struct akcipher_request *req, int err);
 void crypto_finalize_hash_request(struct crypto_engine *engine,
 				  struct ahash_request *req, int err);
+void crypto_finalize_skcipher_request(struct crypto_engine *engine,
+				      struct skcipher_request *req, int err);
 int crypto_engine_start(struct crypto_engine *engine);
 int crypto_engine_stop(struct crypto_engine *engine);
 struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt);
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 2d1849dffb80..76e432cab75d 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -74,7 +74,8 @@ struct ahash_request {
  * @init: **[mandatory]** Initialize the transformation context. Intended only to initialize the
  *	  state of the HASH transformation at the beginning. This shall fill in
  *	  the internal structures used during the entire duration of the whole
- *	  transformation. No data processing happens at this point.
+ *	  transformation. No data processing happens at this point. Driver code
+ *	  implementation must not use req->result.
  * @update: **[mandatory]** Push a chunk of data into the driver for transformation. This
  *	   function actually pushes blocks of data from upper layers into the
  *	   driver, which then passes those to the hardware as seen fit. This
@@ -83,7 +84,8 @@ struct ahash_request {
  *	   transformation. This function shall not modify the transformation
  *	   context, as this function may be called in parallel with the same
  *	   transformation object. Data processing can happen synchronously
- *	   [SHASH] or asynchronously [AHASH] at this point.
+ *	   [SHASH] or asynchronously [AHASH] at this point. Driver must not use
+ *	   req->result.
  * @final: **[mandatory]** Retrieve result from the driver. This function finalizes the
  *	   transformation and retrieves the resulting hash from the driver and
  *	   pushes it back to upper layers. No data processing happens at this
@@ -120,11 +122,12 @@ struct ahash_request {
  *	    you want to save partial result of the transformation after
  *	    processing certain amount of data and reload this partial result
  *	    multiple times later on for multiple re-use. No data processing
- *	    happens at this point.
+ *	    happens at this point. Driver must not use req->result.
  * @import: Import partial state of the transformation. This function loads the
  *	    entire state of the ongoing transformation from a provided block of
  *	    data so the transformation can continue from this point onward. No
- *	    data processing happens at this point.
+ *	    data processing happens at this point. Driver must not use
+ *	    req->result.
  * @halg: see struct hash_alg_common
  */
 struct ahash_alg {
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 27040a46d50a..a0b0ad9d585e 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -126,11 +126,6 @@ int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc);
 int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc);
 int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc);
 
-int ahash_mcryptd_update(struct ahash_request *desc);
-int ahash_mcryptd_final(struct ahash_request *desc);
-int ahash_mcryptd_finup(struct ahash_request *desc);
-int ahash_mcryptd_digest(struct ahash_request *desc);
-
 int crypto_init_shash_ops_async(struct crypto_tfm *tfm);
 
 static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h
index 32ceb6929885..f18344518e32 100644
--- a/include/crypto/internal/simd.h
+++ b/include/crypto/internal/simd.h
@@ -7,6 +7,7 @@
 #define _CRYPTO_INTERNAL_SIMD_H
 
 struct simd_skcipher_alg;
+struct skcipher_alg;
 
 struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname,
 						      const char *drvname,
@@ -15,4 +16,10 @@ struct simd_skcipher_alg *simd_skcipher_create(const char *algname,
 					       const char *basename);
 void simd_skcipher_free(struct simd_skcipher_alg *alg);
 
+int simd_register_skciphers_compat(struct skcipher_alg *algs, int count,
+				   struct simd_skcipher_alg **simd_algs);
+
+void simd_unregister_skciphers(struct skcipher_alg *algs, int count,
+			       struct simd_skcipher_alg **simd_algs);
+
 #endif /* _CRYPTO_INTERNAL_SIMD_H */
diff --git a/include/crypto/lrw.h b/include/crypto/lrw.h
deleted file mode 100644
index a9d44c06d081..000000000000
--- a/include/crypto/lrw.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _CRYPTO_LRW_H
-#define _CRYPTO_LRW_H
-
-#include <crypto/b128ops.h>
-
-struct scatterlist;
-struct gf128mul_64k;
-struct blkcipher_desc;
-
-#define LRW_BLOCK_SIZE 16
-
-struct lrw_table_ctx {
-	/* optimizes multiplying a random (non incrementing, as at the
-	 * start of a new sector) value with key2, we could also have
-	 * used 4k optimization tables or no optimization at all. In the
-	 * latter case we would have to store key2 here */
-	struct gf128mul_64k *table;
-	/* stores:
-	 *  key2*{ 0,0,...0,0,0,0,1 }, key2*{ 0,0,...0,0,0,1,1 },
-	 *  key2*{ 0,0,...0,0,1,1,1 }, key2*{ 0,0,...0,1,1,1,1 }
-	 *  key2*{ 0,0,...1,1,1,1,1 }, etc
-	 * needed for optimized multiplication of incrementing values
-	 * with key2 */
-	be128 mulinc[128];
-};
-
-int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak);
-void lrw_free_table(struct lrw_table_ctx *ctx);
-
-struct lrw_crypt_req {
-	be128 *tbuf;
-	unsigned int tbuflen;
-
-	struct lrw_table_ctx *table_ctx;
-	void *crypt_ctx;
-	void (*crypt_fn)(void *ctx, u8 *blks, unsigned int nbytes);
-};
-
-int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-	      struct scatterlist *src, unsigned int nbytes,
-	      struct lrw_crypt_req *req);
-
-#endif  /* _CRYPTO_LRW_H */
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
new file mode 100644
index 000000000000..b64e64d20b28
--- /dev/null
+++ b/include/crypto/sm4.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Common values for the SM4 algorithm
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ */
+
+#ifndef _CRYPTO_SM4_H
+#define _CRYPTO_SM4_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define SM4_KEY_SIZE	16
+#define SM4_BLOCK_SIZE	16
+#define SM4_RKEY_WORDS	32
+
+struct crypto_sm4_ctx {
+	u32 rkey_enc[SM4_RKEY_WORDS];
+	u32 rkey_dec[SM4_RKEY_WORDS];
+};
+
+int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len);
+int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
+			  unsigned int key_len);
+
+#endif
diff --git a/include/crypto/speck.h b/include/crypto/speck.h
new file mode 100644
index 000000000000..73cfc952d405
--- /dev/null
+++ b/include/crypto/speck.h
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common values for the Speck algorithm
+ */
+
+#ifndef _CRYPTO_SPECK_H
+#define _CRYPTO_SPECK_H
+
+#include <linux/types.h>
+
+/* Speck128 */
+
+#define SPECK128_BLOCK_SIZE	16
+
+#define SPECK128_128_KEY_SIZE	16
+#define SPECK128_128_NROUNDS	32
+
+#define SPECK128_192_KEY_SIZE	24
+#define SPECK128_192_NROUNDS	33
+
+#define SPECK128_256_KEY_SIZE	32
+#define SPECK128_256_NROUNDS	34
+
+struct speck128_tfm_ctx {
+	u64 round_keys[SPECK128_256_NROUNDS];
+	int nrounds;
+};
+
+void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
+			     u8 *out, const u8 *in);
+
+void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
+			     u8 *out, const u8 *in);
+
+int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
+			   unsigned int keysize);
+
+/* Speck64 */
+
+#define SPECK64_BLOCK_SIZE	8
+
+#define SPECK64_96_KEY_SIZE	12
+#define SPECK64_96_NROUNDS	26
+
+#define SPECK64_128_KEY_SIZE	16
+#define SPECK64_128_NROUNDS	27
+
+struct speck64_tfm_ctx {
+	u32 round_keys[SPECK64_128_NROUNDS];
+	int nrounds;
+};
+
+void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
+			    u8 *out, const u8 *in);
+
+void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
+			    u8 *out, const u8 *in);
+
+int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
+			  unsigned int keysize);
+
+#endif /* _CRYPTO_SPECK_H */
diff --git a/include/crypto/xts.h b/include/crypto/xts.h
index 322aab6e78a7..34d94c95445a 100644
--- a/include/crypto/xts.h
+++ b/include/crypto/xts.h
@@ -6,27 +6,10 @@
 #include <crypto/internal/skcipher.h>
 #include <linux/fips.h>
 
-struct scatterlist;
-struct blkcipher_desc;
-
 #define XTS_BLOCK_SIZE 16
 
-struct xts_crypt_req {
-	le128 *tbuf;
-	unsigned int tbuflen;
-
-	void *tweak_ctx;
-	void (*tweak_fn)(void *ctx, u8* dst, const u8* src);
-	void *crypt_ctx;
-	void (*crypt_fn)(void *ctx, u8 *blks, unsigned int nbytes);
-};
-
 #define XTS_TWEAK_CAST(x) ((void (*)(void *, u8*, const u8*))(x))
 
-int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-	      struct scatterlist *src, unsigned int nbytes,
-	      struct xts_crypt_req *req);
-
 static inline int xts_check_key(struct crypto_tfm *tfm,
 				const u8 *key, unsigned int keylen)
 {
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 451aaa0786ae..4b13e0a3e15b 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -156,6 +156,23 @@ static inline void le64_add_cpu(__le64 *var, u64 val)
 	*var = cpu_to_le64(le64_to_cpu(*var) + val);
 }
 
+/* XXX: this stuff can be optimized */
+static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
+{
+	while (words--) {
+		__le32_to_cpus(buf);
+		buf++;
+	}
+}
+
+static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
+{
+	while (words--) {
+		__cpu_to_le32s(buf);
+		buf++;
+	}
+}
+
 static inline void be16_add_cpu(__be16 *var, u16 val)
 {
 	*var = cpu_to_be16(be16_to_cpu(*var) + val);
diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h
index 357ae4611a45..bd21af828ff6 100644
--- a/include/linux/crc32c.h
+++ b/include/linux/crc32c.h
@@ -5,6 +5,7 @@
 #include <linux/types.h>
 
 extern u32 crc32c(u32 crc, const void *address, unsigned int length);
+extern const char *crc32c_impl(void);
 
 /* This macro exists for backwards-compatibility. */
 #define crc32c_le crc32c
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 7e6e84cf6383..6eb06101089f 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -435,6 +435,14 @@ struct compress_alg {
  * @cra_exit: Deinitialize the cryptographic transformation object. This is a
  *	      counterpart to @cra_init, used to remove various changes set in
  *	      @cra_init.
+ * @cra_u.ablkcipher: Union member which contains an asynchronous block cipher
+ *		      definition. See @struct @ablkcipher_alg.
+ * @cra_u.blkcipher: Union member which contains a synchronous block cipher
+ * 		     definition See @struct @blkcipher_alg.
+ * @cra_u.cipher: Union member which contains a single-block symmetric cipher
+ *		  definition. See @struct @cipher_alg.
+ * @cra_u.compress: Union member which contains a (de)compression algorithm.
+ *		    See @struct @compress_alg.
  * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE
  * @cra_list: internally used
  * @cra_users: internally used
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 82a99d366aec..94acbde17bb1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -56,9 +56,7 @@ struct qstr {
 
 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
 
-extern const char empty_string[];
 extern const struct qstr empty_name;
-extern const char slash_string[];
 extern const struct qstr slash_name;
 
 struct dentry_stat_t {
@@ -361,7 +359,7 @@ static inline void dont_mount(struct dentry *dentry)
 
 extern void __d_lookup_done(struct dentry *);
 
-static inline int d_in_lookup(struct dentry *dentry)
+static inline int d_in_lookup(const struct dentry *dentry)
 {
 	return dentry->d_flags & DCACHE_PAR_LOOKUP;
 }
@@ -489,7 +487,7 @@ static inline bool d_really_is_positive(const struct dentry *dentry)
 	return dentry->d_inode != NULL;
 }
 
-static inline int simple_positive(struct dentry *dentry)
+static inline int simple_positive(const struct dentry *dentry)
 {
 	return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index c7902ca7c9f4..78f456fcd242 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -161,11 +161,9 @@ struct hrtimer_clock_base {
 enum  hrtimer_base_type {
 	HRTIMER_BASE_MONOTONIC,
 	HRTIMER_BASE_REALTIME,
-	HRTIMER_BASE_BOOTTIME,
 	HRTIMER_BASE_TAI,
 	HRTIMER_BASE_MONOTONIC_SOFT,
 	HRTIMER_BASE_REALTIME_SOFT,
-	HRTIMER_BASE_BOOTTIME_SOFT,
 	HRTIMER_BASE_TAI_SOFT,
 	HRTIMER_MAX_CLOCK_BASES,
 };
diff --git a/include/linux/irq.h b/include/linux/irq.h
index a0231e96a578..65916a305f3d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -10,18 +10,13 @@
  * Thanks. --rmk
  */
 
-#include <linux/smp.h>
-#include <linux/linkage.h>
 #include <linux/cache.h>
 #include <linux/spinlock.h>
 #include <linux/cpumask.h>
-#include <linux/gfp.h>
 #include <linux/irqhandler.h>
 #include <linux/irqreturn.h>
 #include <linux/irqnr.h>
-#include <linux/errno.h>
 #include <linux/topology.h>
-#include <linux/wait.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 
@@ -1170,4 +1165,22 @@ int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest);
 int ipi_send_single(unsigned int virq, unsigned int cpu);
 int ipi_send_mask(unsigned int virq, const struct cpumask *dest);
 
+#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
+/*
+ * Registers a generic IRQ handling function as the top-level IRQ handler in
+ * the system, which is generally the first C code called from an assembly
+ * architecture-specific interrupt handler.
+ *
+ * Returns 0 on success, or -EBUSY if an IRQ handler has already been
+ * registered.
+ */
+int __init set_handle_irq(void (*handle_irq)(struct pt_regs *));
+
+/*
+ * Allows interrupt handlers to find the irqchip that's been registered as the
+ * top-level IRQ handler.
+ */
+extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
+#endif
+
 #endif /* _LINUX_IRQ_H */
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index b26eccc78fb1..f5af3b594e6e 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -106,6 +106,7 @@
 #define GICR_PIDR2			GICD_PIDR2
 
 #define GICR_CTLR_ENABLE_LPIS		(1UL << 0)
+#define GICR_CTLR_RWP			(1UL << 3)
 
 #define GICR_TYPER_CPU_NUMBER(r)	(((r) >> 8) & 0xffff)
 
@@ -312,7 +313,8 @@
 #define GITS_TYPER_DEVBITS_SHIFT	13
 #define GITS_TYPER_DEVBITS(r)		((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
 #define GITS_TYPER_PTA			(1UL << 19)
-#define GITS_TYPER_HWCOLLCNT_SHIFT	24
+#define GITS_TYPER_HCC_SHIFT		24
+#define GITS_TYPER_HCC(r)		(((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
 #define GITS_TYPER_VMOVP		(1ULL << 37)
 
 #define GITS_IIDR_REV_SHIFT		12
diff --git a/include/linux/random.h b/include/linux/random.h
index 4024f7d9c77d..2ddf13b4281e 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -85,10 +85,8 @@ static inline unsigned long get_random_canary(void)
 static inline int get_random_bytes_wait(void *buf, int nbytes)
 {
 	int ret = wait_for_random_bytes();
-	if (unlikely(ret))
-		return ret;
 	get_random_bytes(buf, nbytes);
-	return 0;
+	return ret;
 }
 
 #define declare_get_random_var_wait(var) \
diff --git a/include/linux/sizes.h b/include/linux/sizes.h
index ce3e8150c174..fbde0bc7e882 100644
--- a/include/linux/sizes.h
+++ b/include/linux/sizes.h
@@ -8,6 +8,8 @@
 #ifndef __LINUX_SIZES_H__
 #define __LINUX_SIZES_H__
 
+#include <linux/const.h>
+
 #define SZ_1				0x00000001
 #define SZ_2				0x00000002
 #define SZ_4				0x00000004
@@ -44,4 +46,6 @@
 #define SZ_1G				0x40000000
 #define SZ_2G				0x80000000
 
+#define SZ_4G				_AC(0x100000000, ULL)
+
 #endif /* __LINUX_SIZES_H__ */
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 65b1de25198d..d2bcd4377b56 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -217,5 +217,6 @@ static inline s64 timeval_to_ns(const struct timeval *tv)
  * Returns the timeval representation of the nsec parameter.
  */
 extern struct timeval ns_to_timeval(const s64 nsec);
+extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec);
 
 #endif
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index d315c3d6725c..4b3dca173e89 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -52,6 +52,7 @@ struct tk_read_base {
  * @offs_real:		Offset clock monotonic -> clock realtime
  * @offs_boot:		Offset clock monotonic -> clock boottime
  * @offs_tai:		Offset clock monotonic -> clock tai
+ * @time_suspended:	Accumulated suspend time
  * @tai_offset:		The current UTC to TAI offset in seconds
  * @clock_was_set_seq:	The sequence number of clock was set events
  * @cs_was_changed_seq:	The sequence number of clocksource change events
@@ -94,6 +95,7 @@ struct timekeeper {
 	ktime_t			offs_real;
 	ktime_t			offs_boot;
 	ktime_t			offs_tai;
+	ktime_t			time_suspended;
 	s32			tai_offset;
 	unsigned int		clock_was_set_seq;
 	u8			cs_was_changed_seq;
@@ -117,6 +119,8 @@ struct timekeeper {
 	s64			ntp_error;
 	u32			ntp_error_shift;
 	u32			ntp_err_mult;
+	/* Flag used to avoid updating NTP twice with same second */
+	u32			skip_second_overflow;
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 	long			last_warning;
 	/*
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index b17bcce58bc4..82c219dfd3bb 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -32,20 +32,25 @@ extern void getrawmonotonic64(struct timespec64 *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 extern time64_t ktime_get_seconds(void);
 extern time64_t ktime_get_real_seconds(void);
+extern void ktime_get_active_ts64(struct timespec64 *ts);
 
 extern int __getnstimeofday64(struct timespec64 *tv);
 extern void getnstimeofday64(struct timespec64 *tv);
 extern void getboottime64(struct timespec64 *ts);
 
-#define ktime_get_real_ts64(ts)	getnstimeofday64(ts)
+#define ktime_get_real_ts64(ts)		getnstimeofday64(ts)
+
+/* Clock BOOTTIME compatibility wrappers */
+static inline void get_monotonic_boottime64(struct timespec64 *ts)
+{
+	ktime_get_ts64(ts);
+}
 
 /*
  * ktime_t based interfaces
  */
-
 enum tk_offsets {
 	TK_OFFS_REAL,
-	TK_OFFS_BOOT,
 	TK_OFFS_TAI,
 	TK_OFFS_MAX,
 };
@@ -56,6 +61,10 @@ extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
 extern ktime_t ktime_get_raw(void);
 extern u32 ktime_get_resolution_ns(void);
 
+/* Clock BOOTTIME compatibility wrappers */
+static inline ktime_t ktime_get_boottime(void) { return ktime_get(); }
+static inline u64 ktime_get_boot_ns(void) { return ktime_get(); }
+
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
  */
@@ -65,17 +74,6 @@ static inline ktime_t ktime_get_real(void)
 }
 
 /**
- * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
- *
- * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
- * time spent in suspend.
- */
-static inline ktime_t ktime_get_boottime(void)
-{
-	return ktime_get_with_offset(TK_OFFS_BOOT);
-}
-
-/**
  * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
  */
 static inline ktime_t ktime_get_clocktai(void)
@@ -101,11 +99,6 @@ static inline u64 ktime_get_real_ns(void)
 	return ktime_to_ns(ktime_get_real());
 }
 
-static inline u64 ktime_get_boot_ns(void)
-{
-	return ktime_to_ns(ktime_get_boottime());
-}
-
 static inline u64 ktime_get_tai_ns(void)
 {
 	return ktime_to_ns(ktime_get_clocktai());
@@ -118,17 +111,11 @@ static inline u64 ktime_get_raw_ns(void)
 
 extern u64 ktime_get_mono_fast_ns(void);
 extern u64 ktime_get_raw_fast_ns(void);
-extern u64 ktime_get_boot_fast_ns(void);
 extern u64 ktime_get_real_fast_ns(void);
 
 /*
  * timespec64 interfaces utilizing the ktime based ones
  */
-static inline void get_monotonic_boottime64(struct timespec64 *ts)
-{
-	*ts = ktime_to_timespec64(ktime_get_boottime());
-}
-
 static inline void timekeeping_clocktai64(struct timespec64 *ts)
 {
 	*ts = ktime_to_timespec64(ktime_get_clocktai());
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index c3ac5ec86519..965c650a5273 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -71,6 +71,12 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
 		 { BTRFS_FILE_EXTENT_REG,	"REG"	 },		\
 		 { BTRFS_FILE_EXTENT_PREALLOC,	"PREALLOC"})
 
+#define show_qgroup_rsv_type(type)					\
+	__print_symbolic(type,						\
+		{ BTRFS_QGROUP_RSV_DATA,	  "DATA"	},	\
+		{ BTRFS_QGROUP_RSV_META_PERTRANS, "META_PERTRANS" },	\
+		{ BTRFS_QGROUP_RSV_META_PREALLOC, "META_PREALLOC" })
+
 #define BTRFS_GROUP_FLAGS	\
 	{ BTRFS_BLOCK_GROUP_DATA,	"DATA"},	\
 	{ BTRFS_BLOCK_GROUP_SYSTEM,	"SYSTEM"},	\
@@ -248,6 +254,41 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
 		  __entry->refs, __entry->compress_type)
 );
 
+TRACE_EVENT(btrfs_handle_em_exist,
+
+	TP_PROTO(const struct extent_map *existing, const struct extent_map *map, u64 start, u64 len),
+
+	TP_ARGS(existing, map, start, len),
+
+	TP_STRUCT__entry(
+		__field(	u64,  e_start		)
+		__field(	u64,  e_len		)
+		__field(	u64,  map_start		)
+		__field(	u64,  map_len		)
+		__field(	u64,  start		)
+		__field(	u64,  len		)
+	),
+
+	TP_fast_assign(
+		__entry->e_start	= existing->start;
+		__entry->e_len		= existing->len;
+		__entry->map_start	= map->start;
+		__entry->map_len	= map->len;
+		__entry->start		= start;
+		__entry->len		= len;
+	),
+
+	TP_printk("start=%llu len=%llu "
+		  "existing(start=%llu len=%llu) "
+		  "em(start=%llu len=%llu)",
+		  (unsigned long long)__entry->start,
+		  (unsigned long long)__entry->len,
+		  (unsigned long long)__entry->e_start,
+		  (unsigned long long)__entry->e_len,
+		  (unsigned long long)__entry->map_start,
+		  (unsigned long long)__entry->map_len)
+);
+
 /* file extent item */
 DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
 
@@ -1598,28 +1639,52 @@ TRACE_EVENT(qgroup_update_counters,
 TRACE_EVENT(qgroup_update_reserve,
 
 	TP_PROTO(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup,
-		 s64 diff),
+		 s64 diff, int type),
 
-	TP_ARGS(fs_info, qgroup, diff),
+	TP_ARGS(fs_info, qgroup, diff, type),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,	qgid			)
 		__field(	u64,	cur_reserved		)
 		__field(	s64,	diff			)
+		__field(	int,	type			)
 	),
 
 	TP_fast_assign_btrfs(fs_info,
 		__entry->qgid		= qgroup->qgroupid;
-		__entry->cur_reserved	= qgroup->reserved;
+		__entry->cur_reserved	= qgroup->rsv.values[type];
 		__entry->diff		= diff;
 	),
 
-	TP_printk_btrfs("qgid=%llu cur_reserved=%llu diff=%lld",
-		__entry->qgid, __entry->cur_reserved, __entry->diff)
+	TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
+		__entry->qgid, show_qgroup_rsv_type(__entry->type),
+		__entry->cur_reserved, __entry->diff)
 );
 
 TRACE_EVENT(qgroup_meta_reserve,
 
+	TP_PROTO(struct btrfs_root *root, s64 diff, int type),
+
+	TP_ARGS(root, diff, type),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	refroot			)
+		__field(	s64,	diff			)
+		__field(	int,	type			)
+	),
+
+	TP_fast_assign_btrfs(root->fs_info,
+		__entry->refroot	= root->objectid;
+		__entry->diff		= diff;
+	),
+
+	TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
+		show_root_type(__entry->refroot),
+		show_qgroup_rsv_type(__entry->type), __entry->diff)
+);
+
+TRACE_EVENT(qgroup_meta_convert,
+
 	TP_PROTO(struct btrfs_root *root, s64 diff),
 
 	TP_ARGS(root, diff),
@@ -1627,6 +1692,7 @@ TRACE_EVENT(qgroup_meta_reserve,
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,	refroot			)
 		__field(	s64,	diff			)
+		__field(	int,	type			)
 	),
 
 	TP_fast_assign_btrfs(root->fs_info,
@@ -1634,8 +1700,36 @@ TRACE_EVENT(qgroup_meta_reserve,
 		__entry->diff		= diff;
 	),
 
-	TP_printk_btrfs("refroot=%llu(%s) diff=%lld",
-		show_root_type(__entry->refroot), __entry->diff)
+	TP_printk_btrfs("refroot=%llu(%s) type=%s->%s diff=%lld",
+		show_root_type(__entry->refroot),
+		show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PREALLOC),
+		show_qgroup_rsv_type(BTRFS_QGROUP_RSV_META_PERTRANS),
+		__entry->diff)
+);
+
+TRACE_EVENT(qgroup_meta_free_all_pertrans,
+
+	TP_PROTO(struct btrfs_root *root),
+
+	TP_ARGS(root),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	refroot			)
+		__field(	s64,	diff			)
+		__field(	int,	type			)
+	),
+
+	TP_fast_assign_btrfs(root->fs_info,
+		__entry->refroot	= root->objectid;
+		spin_lock(&root->qgroup_meta_rsv_lock);
+		__entry->diff		= -(s64)root->qgroup_meta_rsv_pertrans;
+		spin_unlock(&root->qgroup_meta_rsv_lock);
+		__entry->type		= BTRFS_QGROUP_RSV_META_PERTRANS;
+	),
+
+	TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
+		show_root_type(__entry->refroot),
+		show_qgroup_rsv_type(__entry->type), __entry->diff)
 );
 
 DECLARE_EVENT_CLASS(btrfs__prelim_ref,
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 4d0e3af4e561..0e31eb136c57 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2585,6 +2585,49 @@ DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key);
 DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key);
 DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping);
 
+TRACE_EVENT(ext4_shutdown,
+	TP_PROTO(struct super_block *sb, unsigned long flags),
+
+	TP_ARGS(sb, flags),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(     unsigned,	flags			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev %d,%d flags %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->flags)
+);
+
+TRACE_EVENT(ext4_error,
+	TP_PROTO(struct super_block *sb, const char *function,
+		 unsigned int line),
+
+	TP_ARGS(sb, function, line),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field( const char *,	function		)
+		__field(     unsigned,	line			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= sb->s_dev;
+		__entry->function = function;
+		__entry->line	= line;
+	),
+
+	TP_printk("dev %d,%d function %s line %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->function, __entry->line)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 73ae06b67051..6088bca89917 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -207,7 +207,8 @@ typedef struct siginfo {
 #define __FPE_DECERR	11	/* packed decimal error */
 #define __FPE_INVASC	12	/* invalid ASCII digit */
 #define __FPE_INVDEC	13	/* invalid decimal digit */
-#define NSIGFPE		13
+#define FPE_FLTUNK	14	/* undiagnosed floating-point exception */
+#define NSIGFPE		14
 
 /*
  * SIGSEGV si_codes
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 53f8dd84beb5..16a296612ba4 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -43,6 +43,18 @@ struct itimerval {
 };
 
 /*
+ * legacy timeval structure, only embedded in structures that
+ * traditionally used 'timeval' to pass time intervals (not absolute
+ * times). Do not add new users. If user space fails to compile
+ * here, this is probably because it is not y2038 safe and needs to
+ * be changed to use another interface.
+ */
+struct __kernel_old_timeval {
+	__kernel_long_t tv_sec;
+	__kernel_long_t tv_usec;
+};
+
+/*
  * The IDs of the various system clocks (for POSIX.1b interval timers):
  */
 #define CLOCK_REALTIME			0
@@ -61,6 +73,7 @@ struct itimerval {
  */
 #define CLOCK_SGI_CYCLE			10
 #define CLOCK_TAI			11
+#define CLOCK_MONOTONIC_ACTIVE		12
 
 #define MAX_CLOCKS			16
 #define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 6fc87ccda1d7..c6766f326072 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -132,3 +132,9 @@ config GENERIC_IRQ_DEBUGFS
 	  If you don't know what to do here, say N.
 
 endmenu
+
+config GENERIC_IRQ_MULTI_HANDLER
+	depends on !MULTI_IRQ_HANDLER
+	bool
+	help
+	  Allow to specify the low level IRQ handler at run time.
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 8c82ea26e837..16cbf6beb276 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/autoprobe.c
- *
  * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
  *
  * This file contains the interrupt probing code and driver APIs.
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c69357a43849..a2b3d9de999c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1,13 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/chip.c
- *
  * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
  * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
  *
- * This file contains the core interrupt handling code, for irq-chip
- * based architectures.
- *
- * Detailed information is available in Documentation/core-api/genericirq.rst
+ * This file contains the core interrupt handling code, for irq-chip based
+ * architectures. Detailed information is available in
+ * Documentation/core-api/genericirq.rst
  */
 
 #include <linux/irq.h>
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 9eb09aef0313..5b1072e394b2 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Generic cpu hotunplug interrupt migration code copied from the
  * arch/arm implementation
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index acfaaef8672a..4dadeb3d6666 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -1,8 +1,6 @@
-/*
- * Copyright 2017 Thomas Gleixner <tglx@linutronix.de>
- *
- * This file is licensed under the GPL V2.
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2017 Thomas Gleixner <tglx@linutronix.de>
+
 #include <linux/irqdomain.h>
 #include <linux/irq.h>
 #include <linux/uaccess.h>
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 194c506d9d20..6a682c229e10 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/device.h>
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
index 326a67f2410b..0b0cdf206dc4 100644
--- a/kernel/irq/dummychip.c
+++ b/kernel/irq/dummychip.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
  * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 508c03dfef25..e2999a070a99 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Library implementing the most common irq chip callback functions
  *
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 79f987b942b8..38554bc35375 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/handle.c
- *
  * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
  * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
  *
- * This file contains the core interrupt handling code.
- *
- * Detailed information is available in Documentation/core-api/genericirq.rst
+ * This file contains the core interrupt handling code. Detailed
+ * information is available in Documentation/core-api/genericirq.rst
  *
  */
 
@@ -20,6 +18,10 @@
 
 #include "internals.h"
 
+#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
+void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
+#endif
+
 /**
  * handle_bad_irq - handle spurious and unhandled irqs
  * @desc:      description of the interrupt
@@ -207,3 +209,14 @@ irqreturn_t handle_irq_event(struct irq_desc *desc)
 	irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
 	return ret;
 }
+
+#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
+int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
+{
+	if (handle_arch_irq)
+		return -EBUSY;
+
+	handle_arch_irq = handle_irq;
+	return 0;
+}
+#endif
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 259a22aa9934..8b778e37dc6d 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/ipi.c
- *
  * Copyright (C) 2015 Imagination Technologies Ltd
  * Author: Qais Yousef <qais.yousef@imgtec.com>
  *
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index 24caabf1a0f7..fc4f361a86bb 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Copyright (C) 2017 Bartosz Golaszewski <brgl@bgdev.pl>
  *
@@ -7,6 +8,7 @@
  * option) any later version.
  */
 
+#include <linux/slab.h>
 #include <linux/irq_sim.h>
 #include <linux/irq.h>
 
@@ -49,7 +51,8 @@ static void irq_sim_handle_irq(struct irq_work *work)
  * @sim:        The interrupt simulator object to initialize.
  * @num_irqs:   Number of interrupts to allocate
  *
- * Returns 0 on success and a negative error number on failure.
+ * On success: return the base of the allocated interrupt range.
+ * On failure: a negative errno.
  */
 int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs)
 {
@@ -78,7 +81,7 @@ int irq_sim_init(struct irq_sim *sim, unsigned int num_irqs)
 	init_irq_work(&sim->work_ctx.work, irq_sim_handle_irq);
 	sim->irq_count = num_irqs;
 
-	return 0;
+	return sim->irq_base;
 }
 EXPORT_SYMBOL_GPL(irq_sim_init);
 
@@ -110,7 +113,8 @@ static void devm_irq_sim_release(struct device *dev, void *res)
  * @sim:        The interrupt simulator object to initialize.
  * @num_irqs:   Number of interrupts to allocate
  *
- * Returns 0 on success and a negative error number on failure.
+ * On success: return the base of the allocated interrupt range.
+ * On failure: a negative errno.
  */
 int devm_irq_sim_init(struct device *dev, struct irq_sim *sim,
 		      unsigned int num_irqs)
@@ -123,7 +127,7 @@ int devm_irq_sim_init(struct device *dev, struct irq_sim *sim,
 		return -ENOMEM;
 
 	rv = irq_sim_init(sim, num_irqs);
-	if (rv) {
+	if (rv < 0) {
 		devres_free(dr);
 		return rv;
 	}
@@ -131,7 +135,7 @@ int devm_irq_sim_init(struct device *dev, struct irq_sim *sim,
 	dr->sim = sim;
 	devres_add(dev, dr);
 
-	return 0;
+	return rv;
 }
 EXPORT_SYMBOL_GPL(devm_irq_sim_init);
 
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 49b54e9979cc..afc7f902d74a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -1,10 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
  * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
  *
- * This file contains the interrupt descriptor management code
- *
- * Detailed information is available in Documentation/core-api/genericirq.rst
+ * This file contains the interrupt descriptor management code. Detailed
+ * information is available in Documentation/core-api/genericirq.rst
  *
  */
 #include <linux/irq.h>
@@ -210,6 +210,22 @@ static ssize_t type_show(struct kobject *kobj,
 }
 IRQ_ATTR_RO(type);
 
+static ssize_t wakeup_show(struct kobject *kobj,
+			   struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	ret = sprintf(buf, "%s\n",
+		      irqd_is_wakeup_set(&desc->irq_data) ? "enabled" : "disabled");
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+
+}
+IRQ_ATTR_RO(wakeup);
+
 static ssize_t name_show(struct kobject *kobj,
 			 struct kobj_attribute *attr, char *buf)
 {
@@ -253,6 +269,7 @@ static struct attribute *irq_attrs[] = {
 	&chip_name_attr.attr,
 	&hwirq_attr.attr,
 	&type_attr.attr,
+	&wakeup_attr.attr,
 	&name_attr.attr,
 	&actions_attr.attr,
 	NULL
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 82b8b18ee1eb..5d9fc01b60a6 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
 #define pr_fmt(fmt)  "irq: " fmt
 
 #include <linux/acpi.h>
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0f922729bab9..e3336d904f64 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/manage.c
- *
  * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
  * Copyright (C) 2005-2006 Thomas Gleixner
  *
@@ -855,10 +854,14 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
 	 * This code is triggered unconditionally. Check the affinity
 	 * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
 	 */
-	if (cpumask_available(desc->irq_common_data.affinity))
-		cpumask_copy(mask, desc->irq_common_data.affinity);
-	else
+	if (cpumask_available(desc->irq_common_data.affinity)) {
+		const struct cpumask *m;
+
+		m = irq_data_get_effective_affinity_mask(&desc->irq_data);
+		cpumask_copy(mask, m);
+	} else {
 		valid = false;
+	}
 	raw_spin_unlock_irq(&desc->lock);
 
 	if (valid)
@@ -1519,9 +1522,9 @@ EXPORT_SYMBOL_GPL(setup_irq);
  * Internal function to unregister an irqaction - used to free
  * regular and special interrupts that are part of the architecture.
  */
-static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
+static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned irq = desc->irq_data.irq;
 	struct irqaction *action, **action_ptr;
 	unsigned long flags;
 
@@ -1651,7 +1654,7 @@ void remove_irq(unsigned int irq, struct irqaction *act)
 	struct irq_desc *desc = irq_to_desc(irq);
 
 	if (desc && !WARN_ON(irq_settings_is_per_cpu_devid(desc)))
-		__free_irq(irq, act->dev_id);
+		__free_irq(desc, act->dev_id);
 }
 EXPORT_SYMBOL_GPL(remove_irq);
 
@@ -1685,7 +1688,7 @@ const void *free_irq(unsigned int irq, void *dev_id)
 		desc->affinity_notify = NULL;
 #endif
 
-	action = __free_irq(irq, dev_id);
+	action = __free_irq(desc, dev_id);
 
 	if (!action)
 		return NULL;
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 4c5770407031..5092494bf261 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -1,8 +1,6 @@
-/*
- * Copyright (C) 2017 Thomas Gleixner <tglx@linutronix.de>
- *
- * SPDX-License-Identifier: GPL-2.0
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2017 Thomas Gleixner <tglx@linutronix.de>
+
 #include <linux/spinlock.h>
 #include <linux/seq_file.h>
 #include <linux/bitmap.h>
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2f3c4f5382cc..2a8571f72b17 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/msi.c
- *
  * Copyright (C) 2014 Intel Corp.
  * Author: Jiang Liu <jiang.liu@linux.intel.com>
  *
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 6bd9b58429cc..d6961d3c6f9e 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/pm.c
- *
  * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
  *
  * This file contains power management functions related to interrupts.
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index e8f374971e37..7cb091d81d91 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/proc.c
- *
  * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
  *
  * This file contains the /proc/irq/ handling code.
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 1d08f45135c2..95414ad3506a 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/resend.c
- *
  * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
  * Copyright (C) 2005-2006, Thomas Gleixner
  *
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 6cdecc6f4c53..d867d6ddafdd 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * linux/kernel/irq/spurious.c
- *
  * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
  *
  * This file contains spurious interrupt handling.
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index e0923fa4927a..1e4cb63a5c82 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -1,13 +1,6 @@
-/*
- * linux/kernel/irq/timings.c
- *
- * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+
 #include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index f04466655238..47491aa3e790 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2844,10 +2844,6 @@ enum siginfo_layout siginfo_layout(int sig, int si_code)
 		if ((sig == SIGFPE) && (si_code == FPE_FIXME))
 			layout = SIL_FAULT;
 #endif
-#ifdef BUS_FIXME
-		if ((sig == SIGBUS) && (si_code == BUS_FIXME))
-			layout = SIL_FAULT;
-#endif
 	}
 	return layout;
 }
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 24d243ef8e71..177de3640c78 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -460,40 +460,46 @@ struct tasklet_head {
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 
-void __tasklet_schedule(struct tasklet_struct *t)
+static void __tasklet_schedule_common(struct tasklet_struct *t,
+				      struct tasklet_head __percpu *headp,
+				      unsigned int softirq_nr)
 {
+	struct tasklet_head *head;
 	unsigned long flags;
 
 	local_irq_save(flags);
+	head = this_cpu_ptr(headp);
 	t->next = NULL;
-	*__this_cpu_read(tasklet_vec.tail) = t;
-	__this_cpu_write(tasklet_vec.tail, &(t->next));
-	raise_softirq_irqoff(TASKLET_SOFTIRQ);
+	*head->tail = t;
+	head->tail = &(t->next);
+	raise_softirq_irqoff(softirq_nr);
 	local_irq_restore(flags);
 }
+
+void __tasklet_schedule(struct tasklet_struct *t)
+{
+	__tasklet_schedule_common(t, &tasklet_vec,
+				  TASKLET_SOFTIRQ);
+}
 EXPORT_SYMBOL(__tasklet_schedule);
 
 void __tasklet_hi_schedule(struct tasklet_struct *t)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	t->next = NULL;
-	*__this_cpu_read(tasklet_hi_vec.tail) = t;
-	__this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
-	raise_softirq_irqoff(HI_SOFTIRQ);
-	local_irq_restore(flags);
+	__tasklet_schedule_common(t, &tasklet_hi_vec,
+				  HI_SOFTIRQ);
 }
 EXPORT_SYMBOL(__tasklet_hi_schedule);
 
-static __latent_entropy void tasklet_action(struct softirq_action *a)
+static void tasklet_action_common(struct softirq_action *a,
+				  struct tasklet_head *tl_head,
+				  unsigned int softirq_nr)
 {
 	struct tasklet_struct *list;
 
 	local_irq_disable();
-	list = __this_cpu_read(tasklet_vec.head);
-	__this_cpu_write(tasklet_vec.head, NULL);
-	__this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head));
+	list = tl_head->head;
+	tl_head->head = NULL;
+	tl_head->tail = &tl_head->head;
 	local_irq_enable();
 
 	while (list) {
@@ -515,47 +521,21 @@ static __latent_entropy void tasklet_action(struct softirq_action *a)
 
 		local_irq_disable();
 		t->next = NULL;
-		*__this_cpu_read(tasklet_vec.tail) = t;
-		__this_cpu_write(tasklet_vec.tail, &(t->next));
-		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
+		*tl_head->tail = t;
+		tl_head->tail = &t->next;
+		__raise_softirq_irqoff(softirq_nr);
 		local_irq_enable();
 	}
 }
 
-static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
+static __latent_entropy void tasklet_action(struct softirq_action *a)
 {
-	struct tasklet_struct *list;
-
-	local_irq_disable();
-	list = __this_cpu_read(tasklet_hi_vec.head);
-	__this_cpu_write(tasklet_hi_vec.head, NULL);
-	__this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head));
-	local_irq_enable();
-
-	while (list) {
-		struct tasklet_struct *t = list;
-
-		list = list->next;
-
-		if (tasklet_trylock(t)) {
-			if (!atomic_read(&t->count)) {
-				if (!test_and_clear_bit(TASKLET_STATE_SCHED,
-							&t->state))
-					BUG();
-				t->func(t->data);
-				tasklet_unlock(t);
-				continue;
-			}
-			tasklet_unlock(t);
-		}
+	tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
+}
 
-		local_irq_disable();
-		t->next = NULL;
-		*__this_cpu_read(tasklet_hi_vec.tail) = t;
-		__this_cpu_write(tasklet_hi_vec.tail, &(t->next));
-		__raise_softirq_irqoff(HI_SOFTIRQ);
-		local_irq_enable();
-	}
+static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
+{
+	tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
 }
 
 void tasklet_init(struct tasklet_struct *t,
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index ec09ce9a6012..639321bf2e39 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -326,6 +326,17 @@ static int alarmtimer_resume(struct device *dev)
 }
 #endif
 
+static void
+__alarm_init(struct alarm *alarm, enum alarmtimer_type type,
+	     enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
+{
+	timerqueue_init(&alarm->node);
+	alarm->timer.function = alarmtimer_fired;
+	alarm->function = function;
+	alarm->type = type;
+	alarm->state = ALARMTIMER_STATE_INACTIVE;
+}
+
 /**
  * alarm_init - Initialize an alarm structure
  * @alarm: ptr to alarm to be initialized
@@ -335,13 +346,9 @@ static int alarmtimer_resume(struct device *dev)
 void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
 		enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
 {
-	timerqueue_init(&alarm->node);
 	hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid,
-			HRTIMER_MODE_ABS);
-	alarm->timer.function = alarmtimer_fired;
-	alarm->function = function;
-	alarm->type = type;
-	alarm->state = ALARMTIMER_STATE_INACTIVE;
+		     HRTIMER_MODE_ABS);
+	__alarm_init(alarm, type, function);
 }
 EXPORT_SYMBOL_GPL(alarm_init);
 
@@ -719,6 +726,8 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
 
 	__set_current_state(TASK_RUNNING);
 
+	destroy_hrtimer_on_stack(&alarm->timer);
+
 	if (!alarm->data)
 		return 0;
 
@@ -740,6 +749,15 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
 	return -ERESTART_RESTARTBLOCK;
 }
 
+static void
+alarm_init_on_stack(struct alarm *alarm, enum alarmtimer_type type,
+		    enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
+{
+	hrtimer_init_on_stack(&alarm->timer, alarm_bases[type].base_clockid,
+			      HRTIMER_MODE_ABS);
+	__alarm_init(alarm, type, function);
+}
+
 /**
  * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep
  * @restart: ptr to restart block
@@ -752,7 +770,7 @@ static long __sched alarm_timer_nsleep_restart(struct restart_block *restart)
 	ktime_t exp = restart->nanosleep.expires;
 	struct alarm alarm;
 
-	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
+	alarm_init_on_stack(&alarm, type, alarmtimer_nsleep_wakeup);
 
 	return alarmtimer_do_nsleep(&alarm, exp, type);
 }
@@ -784,7 +802,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 	if (!capable(CAP_WAKE_ALARM))
 		return -EPERM;
 
-	alarm_init(&alarm, type, alarmtimer_nsleep_wakeup);
+	alarm_init_on_stack(&alarm, type, alarmtimer_nsleep_wakeup);
 
 	exp = timespec64_to_ktime(*tsreq);
 	/* Convert (if necessary) to absolute time */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 65f9e3f24dde..0e974cface0b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -594,6 +594,9 @@ static void __clocksource_select(bool skipcur)
 	if (!best)
 		return;
 
+	if (!strlen(override_name))
+		goto found;
+
 	/* Check for the override clocksource. */
 	list_for_each_entry(cs, &clocksource_list, list) {
 		if (skipcur && cs == curr_clocksource)
@@ -625,6 +628,7 @@ static void __clocksource_select(bool skipcur)
 		break;
 	}
 
+found:
 	if (curr_clocksource != best && !timekeeping_notify(best)) {
 		pr_info("Switched to clocksource %s\n", best->name);
 		curr_clocksource = best;
@@ -853,16 +857,16 @@ EXPORT_SYMBOL(clocksource_unregister);
 
 #ifdef CONFIG_SYSFS
 /**
- * sysfs_show_current_clocksources - sysfs interface for current clocksource
+ * current_clocksource_show - sysfs interface for current clocksource
  * @dev:	unused
  * @attr:	unused
  * @buf:	char buffer to be filled with clocksource list
  *
  * Provides sysfs interface for listing current clocksource.
  */
-static ssize_t
-sysfs_show_current_clocksources(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t current_clocksource_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
 {
 	ssize_t count = 0;
 
@@ -891,7 +895,7 @@ ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
 }
 
 /**
- * sysfs_override_clocksource - interface for manually overriding clocksource
+ * current_clocksource_store - interface for manually overriding clocksource
  * @dev:	unused
  * @attr:	unused
  * @buf:	name of override clocksource
@@ -900,9 +904,9 @@ ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
  * Takes input from sysfs interface for manually overriding the default
  * clocksource selection.
  */
-static ssize_t sysfs_override_clocksource(struct device *dev,
-					  struct device_attribute *attr,
-					  const char *buf, size_t count)
+static ssize_t current_clocksource_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
 {
 	ssize_t ret;
 
@@ -916,9 +920,10 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
 
 	return ret;
 }
+static DEVICE_ATTR_RW(current_clocksource);
 
 /**
- * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
+ * unbind_clocksource_store - interface for manually unbinding clocksource
  * @dev:	unused
  * @attr:	unused
  * @buf:	unused
@@ -926,7 +931,7 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
  *
  * Takes input from sysfs interface for manually unbinding a clocksource.
  */
-static ssize_t sysfs_unbind_clocksource(struct device *dev,
+static ssize_t unbind_clocksource_store(struct device *dev,
 					struct device_attribute *attr,
 					const char *buf, size_t count)
 {
@@ -950,19 +955,19 @@ static ssize_t sysfs_unbind_clocksource(struct device *dev,
 
 	return ret ? ret : count;
 }
+static DEVICE_ATTR_WO(unbind_clocksource);
 
 /**
- * sysfs_show_available_clocksources - sysfs interface for listing clocksource
+ * available_clocksource_show - sysfs interface for listing clocksource
  * @dev:	unused
  * @attr:	unused
  * @buf:	char buffer to be filled with clocksource list
  *
  * Provides sysfs interface for listing registered clocksources
  */
-static ssize_t
-sysfs_show_available_clocksources(struct device *dev,
-				  struct device_attribute *attr,
-				  char *buf)
+static ssize_t available_clocksource_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
 {
 	struct clocksource *src;
 	ssize_t count = 0;
@@ -986,17 +991,15 @@ sysfs_show_available_clocksources(struct device *dev,
 
 	return count;
 }
+static DEVICE_ATTR_RO(available_clocksource);
 
-/*
- * Sysfs setup bits:
- */
-static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
-		   sysfs_override_clocksource);
-
-static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
-
-static DEVICE_ATTR(available_clocksource, 0444,
-		   sysfs_show_available_clocksources, NULL);
+static struct attribute *clocksource_attrs[] = {
+	&dev_attr_current_clocksource.attr,
+	&dev_attr_unbind_clocksource.attr,
+	&dev_attr_available_clocksource.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(clocksource);
 
 static struct bus_type clocksource_subsys = {
 	.name = "clocksource",
@@ -1006,6 +1009,7 @@ static struct bus_type clocksource_subsys = {
 static struct device device_clocksource = {
 	.id	= 0,
 	.bus	= &clocksource_subsys,
+	.groups	= clocksource_groups,
 };
 
 static int __init init_clocksource_sysfs(void)
@@ -1014,17 +1018,7 @@ static int __init init_clocksource_sysfs(void)
 
 	if (!error)
 		error = device_register(&device_clocksource);
-	if (!error)
-		error = device_create_file(
-				&device_clocksource,
-				&dev_attr_current_clocksource);
-	if (!error)
-		error = device_create_file(&device_clocksource,
-					   &dev_attr_unbind_clocksource);
-	if (!error)
-		error = device_create_file(
-				&device_clocksource,
-				&dev_attr_available_clocksource);
+
 	return error;
 }
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 23788100e214..9b082ce86325 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -91,11 +91,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 			.get_time = &ktime_get_real,
 		},
 		{
-			.index = HRTIMER_BASE_BOOTTIME,
-			.clockid = CLOCK_BOOTTIME,
-			.get_time = &ktime_get_boottime,
-		},
-		{
 			.index = HRTIMER_BASE_TAI,
 			.clockid = CLOCK_TAI,
 			.get_time = &ktime_get_clocktai,
@@ -111,11 +106,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 			.get_time = &ktime_get_real,
 		},
 		{
-			.index = HRTIMER_BASE_BOOTTIME_SOFT,
-			.clockid = CLOCK_BOOTTIME,
-			.get_time = &ktime_get_boottime,
-		},
-		{
 			.index = HRTIMER_BASE_TAI_SOFT,
 			.clockid = CLOCK_TAI,
 			.get_time = &ktime_get_clocktai,
@@ -129,7 +119,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
 
 	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,
 	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC,
-	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,
+	[CLOCK_BOOTTIME]	= HRTIMER_BASE_MONOTONIC,
 	[CLOCK_TAI]		= HRTIMER_BASE_TAI,
 };
 
@@ -565,14 +555,12 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 {
 	ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
-	ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
 	ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
 
 	ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
-					    offs_real, offs_boot, offs_tai);
+						   offs_real, offs_tai);
 
 	base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
-	base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
 	base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
 
 	return now;
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index b258bee13b02..6259dbc0191a 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -73,6 +73,8 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)
 	case CLOCK_BOOTTIME:
 		get_monotonic_boottime64(tp);
 		break;
+	case CLOCK_MONOTONIC_ACTIVE:
+		ktime_get_active_ts64(tp);
 	default:
 		return -EINVAL;
 	}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 10b7186d0638..b6899b5060bd 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -252,15 +252,16 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *
 	return 0;
 }
 
-static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp)
+static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
 {
-	get_monotonic_boottime64(tp);
+	timekeeping_clocktai64(tp);
 	return 0;
 }
 
-static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
+static int posix_get_monotonic_active(clockid_t which_clock,
+				      struct timespec64 *tp)
 {
-	timekeeping_clocktai64(tp);
+	ktime_get_active_ts64(tp);
 	return 0;
 }
 
@@ -1316,19 +1317,9 @@ static const struct k_clock clock_tai = {
 	.timer_arm		= common_hrtimer_arm,
 };
 
-static const struct k_clock clock_boottime = {
+static const struct k_clock clock_monotonic_active = {
 	.clock_getres		= posix_get_hrtimer_res,
-	.clock_get		= posix_get_boottime,
-	.nsleep			= common_nsleep,
-	.timer_create		= common_timer_create,
-	.timer_set		= common_timer_set,
-	.timer_get		= common_timer_get,
-	.timer_del		= common_timer_del,
-	.timer_rearm		= common_hrtimer_rearm,
-	.timer_forward		= common_hrtimer_forward,
-	.timer_remaining	= common_hrtimer_remaining,
-	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
-	.timer_arm		= common_hrtimer_arm,
+	.clock_get		= posix_get_monotonic_active,
 };
 
 static const struct k_clock * const posix_clocks[] = {
@@ -1339,10 +1330,11 @@ static const struct k_clock * const posix_clocks[] = {
 	[CLOCK_MONOTONIC_RAW]		= &clock_monotonic_raw,
 	[CLOCK_REALTIME_COARSE]		= &clock_realtime_coarse,
 	[CLOCK_MONOTONIC_COARSE]	= &clock_monotonic_coarse,
-	[CLOCK_BOOTTIME]		= &clock_boottime,
+	[CLOCK_BOOTTIME]		= &clock_monotonic,
 	[CLOCK_REALTIME_ALARM]		= &alarm_clock,
 	[CLOCK_BOOTTIME_ALARM]		= &alarm_clock,
 	[CLOCK_TAI]			= &clock_tai,
+	[CLOCK_MONOTONIC_ACTIVE]	= &clock_monotonic_active,
 };
 
 static const struct k_clock *clockid_to_kclock(const clockid_t id)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 49edc1c4f3e6..099572ca4a8f 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -419,6 +419,19 @@ void tick_suspend_local(void)
 	clockevents_shutdown(td->evtdev);
 }
 
+static void tick_forward_next_period(void)
+{
+	ktime_t delta, now = ktime_get();
+	u64 n;
+
+	delta = ktime_sub(now, tick_next_period);
+	n = ktime_divns(delta, tick_period);
+	tick_next_period += n * tick_period;
+	if (tick_next_period < now)
+		tick_next_period += tick_period;
+	tick_sched_forward_next_period();
+}
+
 /**
  * tick_resume_local - Resume the local tick device
  *
@@ -431,6 +444,8 @@ void tick_resume_local(void)
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 	bool broadcast = tick_resume_check_broadcast();
 
+	tick_forward_next_period();
+
 	clockevents_tick_resume(td->evtdev);
 	if (!broadcast) {
 		if (td->mode == TICKDEV_MODE_PERIODIC)
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index e277284c2831..21efab7485ca 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -141,6 +141,12 @@ static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
 #endif /* !(BROADCAST && ONESHOT) */
 
+#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
+extern void tick_sched_forward_next_period(void);
+#else
+static inline void tick_sched_forward_next_period(void) { }
+#endif
+
 /* NO_HZ_FULL internal */
 #ifdef CONFIG_NO_HZ_FULL
 extern void tick_nohz_init(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5d4a0342f934..f3ab08caa2c3 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -52,6 +52,15 @@ struct tick_sched *tick_get_tick_sched(int cpu)
 static ktime_t last_jiffies_update;
 
 /*
+ * Called after resume. Make sure that jiffies are not fast forwarded due to
+ * clock monotonic being forwarded by the suspended time.
+ */
+void tick_sched_forward_next_period(void)
+{
+	last_jiffies_update = tick_next_period;
+}
+
+/*
  * Must be called with interrupts disabled !
  */
 static void tick_do_update_jiffies64(ktime_t now)
diff --git a/kernel/time/time.c b/kernel/time/time.c
index bd4e6c7dd689..3044d48ebe56 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -488,6 +488,18 @@ struct timeval ns_to_timeval(const s64 nsec)
 }
 EXPORT_SYMBOL(ns_to_timeval);
 
+struct __kernel_old_timeval ns_to_kernel_old_timeval(const s64 nsec)
+{
+	struct timespec64 ts = ns_to_timespec64(nsec);
+	struct __kernel_old_timeval tv;
+
+	tv.tv_sec = ts.tv_sec;
+	tv.tv_usec = (suseconds_t)ts.tv_nsec / 1000;
+
+	return tv;
+}
+EXPORT_SYMBOL(ns_to_kernel_old_timeval);
+
 /**
  * set_normalized_timespec - set timespec sec and nsec parts and normalize
  *
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cd03317e7b57..ca90219a1e73 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -138,7 +138,12 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 
 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 {
-	tk->offs_boot = ktime_add(tk->offs_boot, delta);
+	/* Update both bases so mono and raw stay coupled. */
+	tk->tkr_mono.base += delta;
+	tk->tkr_raw.base += delta;
+
+	/* Accumulate time spent in suspend */
+	tk->time_suspended += delta;
 }
 
 /*
@@ -332,6 +337,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	tk->tkr_mono.mult = clock->mult;
 	tk->tkr_raw.mult = clock->mult;
 	tk->ntp_err_mult = 0;
+	tk->skip_second_overflow = 0;
 }
 
 /* Timekeeper helper functions. */
@@ -467,36 +473,6 @@ u64 ktime_get_raw_fast_ns(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
 
-/**
- * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
- *
- * To keep it NMI safe since we're accessing from tracing, we're not using a
- * separate timekeeper with updates to monotonic clock and boot offset
- * protected with seqlocks. This has the following minor side effects:
- *
- * (1) Its possible that a timestamp be taken after the boot offset is updated
- * but before the timekeeper is updated. If this happens, the new boot offset
- * is added to the old timekeeping making the clock appear to update slightly
- * earlier:
- *    CPU 0                                        CPU 1
- *    timekeeping_inject_sleeptime64()
- *    __timekeeping_inject_sleeptime(tk, delta);
- *                                                 timestamp();
- *    timekeeping_update(tk, TK_CLEAR_NTP...);
- *
- * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
- * partially updated.  Since the tk->offs_boot update is a rare event, this
- * should be a rare occurrence which postprocessing should be able to handle.
- */
-u64 notrace ktime_get_boot_fast_ns(void)
-{
-	struct timekeeper *tk = &tk_core.timekeeper;
-
-	return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
-}
-EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
-
-
 /*
  * See comment for __ktime_get_fast_ns() vs. timestamp ordering
  */
@@ -788,7 +764,6 @@ EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
 
 static ktime_t *offsets[TK_OFFS_MAX] = {
 	[TK_OFFS_REAL]	= &tk_core.timekeeper.offs_real,
-	[TK_OFFS_BOOT]	= &tk_core.timekeeper.offs_boot,
 	[TK_OFFS_TAI]	= &tk_core.timekeeper.offs_tai,
 };
 
@@ -886,6 +861,39 @@ void ktime_get_ts64(struct timespec64 *ts)
 EXPORT_SYMBOL_GPL(ktime_get_ts64);
 
 /**
+ * ktime_get_active_ts64 - Get the active non-suspended monotonic clock
+ * @ts:		pointer to timespec variable
+ *
+ * The function calculates the monotonic clock from the realtime clock and
+ * the wall_to_monotonic offset, subtracts the accumulated suspend time and
+ * stores the result in normalized timespec64 format in the variable
+ * pointed to by @ts.
+ */
+void ktime_get_active_ts64(struct timespec64 *ts)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+	struct timespec64 tomono, tsusp;
+	u64 nsec, nssusp;
+	unsigned int seq;
+
+	WARN_ON(timekeeping_suspended);
+
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		ts->tv_sec = tk->xtime_sec;
+		nsec = timekeeping_get_ns(&tk->tkr_mono);
+		tomono = tk->wall_to_monotonic;
+		nssusp = tk->time_suspended;
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	ts->tv_sec += tomono.tv_sec;
+	ts->tv_nsec = 0;
+	timespec64_add_ns(ts, nsec + tomono.tv_nsec);
+	tsusp = ns_to_timespec64(nssusp);
+	*ts = timespec64_sub(*ts, tsusp);
+}
+
+/**
  * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
  *
  * Returns the seconds portion of CLOCK_MONOTONIC with a single non
@@ -1585,7 +1593,6 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 		return;
 	}
 	tk_xtime_add(tk, delta);
-	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
 	tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
 	tk_debug_account_sleep_time(delta);
 }
@@ -1799,20 +1806,19 @@ device_initcall(timekeeping_init_ops);
  */
 static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
 							 s64 offset,
-							 bool negative,
-							 int adj_scale)
+							 s32 mult_adj)
 {
 	s64 interval = tk->cycle_interval;
-	s32 mult_adj = 1;
 
-	if (negative) {
-		mult_adj = -mult_adj;
+	if (mult_adj == 0) {
+		return;
+	} else if (mult_adj == -1) {
 		interval = -interval;
-		offset  = -offset;
+		offset = -offset;
+	} else if (mult_adj != 1) {
+		interval *= mult_adj;
+		offset *= mult_adj;
 	}
-	mult_adj <<= adj_scale;
-	interval <<= adj_scale;
-	offset <<= adj_scale;
 
 	/*
 	 * So the following can be confusing.
@@ -1860,8 +1866,6 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
 	 *	xtime_nsec_2 = xtime_nsec_1 - offset
 	 * Which simplfies to:
 	 *	xtime_nsec -= offset
-	 *
-	 * XXX - TODO: Doc ntp_error calculation.
 	 */
 	if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
 		/* NTP adjustment caused clocksource mult overflow */
@@ -1872,89 +1876,38 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
 	tk->tkr_mono.mult += mult_adj;
 	tk->xtime_interval += interval;
 	tk->tkr_mono.xtime_nsec -= offset;
-	tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
 }
 
 /*
- * Calculate the multiplier adjustment needed to match the frequency
- * specified by NTP
+ * Adjust the timekeeper's multiplier to the correct frequency
+ * and also to reduce the accumulated error value.
  */
-static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
-							s64 offset)
+static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 {
-	s64 interval = tk->cycle_interval;
-	s64 xinterval = tk->xtime_interval;
-	u32 base = tk->tkr_mono.clock->mult;
-	u32 max = tk->tkr_mono.clock->maxadj;
-	u32 cur_adj = tk->tkr_mono.mult;
-	s64 tick_error;
-	bool negative;
-	u32 adj_scale;
-
-	/* Remove any current error adj from freq calculation */
-	if (tk->ntp_err_mult)
-		xinterval -= tk->cycle_interval;
-
-	tk->ntp_tick = ntp_tick_length();
-
-	/* Calculate current error per tick */
-	tick_error = ntp_tick_length() >> tk->ntp_error_shift;
-	tick_error -= (xinterval + tk->xtime_remainder);
-
-	/* Don't worry about correcting it if its small */
-	if (likely((tick_error >= 0) && (tick_error <= interval)))
-		return;
-
-	/* preserve the direction of correction */
-	negative = (tick_error < 0);
+	u32 mult;
 
-	/* If any adjustment would pass the max, just return */
-	if (negative && (cur_adj - 1) <= (base - max))
-		return;
-	if (!negative && (cur_adj + 1) >= (base + max))
-		return;
 	/*
-	 * Sort out the magnitude of the correction, but
-	 * avoid making so large a correction that we go
-	 * over the max adjustment.
+	 * Determine the multiplier from the current NTP tick length.
+	 * Avoid expensive division when the tick length doesn't change.
 	 */
-	adj_scale = 0;
-	tick_error = abs(tick_error);
-	while (tick_error > interval) {
-		u32 adj = 1 << (adj_scale + 1);
-
-		/* Check if adjustment gets us within 1 unit from the max */
-		if (negative && (cur_adj - adj) <= (base - max))
-			break;
-		if (!negative && (cur_adj + adj) >= (base + max))
-			break;
-
-		adj_scale++;
-		tick_error >>= 1;
+	if (likely(tk->ntp_tick == ntp_tick_length())) {
+		mult = tk->tkr_mono.mult - tk->ntp_err_mult;
+	} else {
+		tk->ntp_tick = ntp_tick_length();
+		mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
+				 tk->xtime_remainder, tk->cycle_interval);
 	}
 
-	/* scale the corrections */
-	timekeeping_apply_adjustment(tk, offset, negative, adj_scale);
-}
+	/*
+	 * If the clock is behind the NTP time, increase the multiplier by 1
+	 * to catch up with it. If it's ahead and there was a remainder in the
+	 * tick division, the clock will slow down. Otherwise it will stay
+	 * ahead until the tick length changes to a non-divisible value.
+	 */
+	tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0;
+	mult += tk->ntp_err_mult;
 
-/*
- * Adjust the timekeeper's multiplier to the correct frequency
- * and also to reduce the accumulated error value.
- */
-static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
-{
-	/* Correct for the current frequency error */
-	timekeeping_freqadjust(tk, offset);
-
-	/* Next make a small adjustment to fix any cumulative error */
-	if (!tk->ntp_err_mult && (tk->ntp_error > 0)) {
-		tk->ntp_err_mult = 1;
-		timekeeping_apply_adjustment(tk, offset, 0, 0);
-	} else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) {
-		/* Undo any existing error adjustment */
-		timekeeping_apply_adjustment(tk, offset, 1, 0);
-		tk->ntp_err_mult = 0;
-	}
+	timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult);
 
 	if (unlikely(tk->tkr_mono.clock->maxadj &&
 		(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
@@ -1971,18 +1924,15 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 	 * in the code above, its possible the required corrective factor to
 	 * xtime_nsec could cause it to underflow.
 	 *
-	 * Now, since we already accumulated the second, cannot simply roll
-	 * the accumulated second back, since the NTP subsystem has been
-	 * notified via second_overflow. So instead we push xtime_nsec forward
-	 * by the amount we underflowed, and add that amount into the error.
-	 *
-	 * We'll correct this error next time through this function, when
-	 * xtime_nsec is not as small.
+	 * Now, since we have already accumulated the second and the NTP
+	 * subsystem has been notified via second_overflow(), we need to skip
+	 * the next update.
 	 */
 	if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
-		s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
-		tk->tkr_mono.xtime_nsec = 0;
-		tk->ntp_error += neg << tk->ntp_error_shift;
+		tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC <<
+							tk->tkr_mono.shift;
+		tk->xtime_sec--;
+		tk->skip_second_overflow = 1;
 	}
 }
 
@@ -2005,6 +1955,15 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 		tk->tkr_mono.xtime_nsec -= nsecps;
 		tk->xtime_sec++;
 
+		/*
+		 * Skip NTP update if this second was accumulated before,
+		 * i.e. xtime_nsec underflowed in timekeeping_adjust()
+		 */
+		if (unlikely(tk->skip_second_overflow)) {
+			tk->skip_second_overflow = 0;
+			continue;
+		}
+
 		/* Figure out if its a leap sec and apply if needed */
 		leap = second_overflow(tk->xtime_sec);
 		if (unlikely(leap)) {
@@ -2121,7 +2080,7 @@ void update_wall_time(void)
 			shift--;
 	}
 
-	/* correct the clock when NTP error is too big */
+	/* Adjust the multiplier to correct NTP error */
 	timekeeping_adjust(tk, offset);
 
 	/*
@@ -2166,7 +2125,7 @@ out:
 void getboottime64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
+	ktime_t t = ktime_sub(tk->offs_real, tk->time_suspended);
 
 	*ts = ktime_to_timespec64(t);
 }
@@ -2236,7 +2195,6 @@ void do_timer(unsigned long ticks)
  * ktime_get_update_offsets_now - hrtimer helper
  * @cwsseq:	pointer to check and store the clock was set sequence number
  * @offs_real:	pointer to storage for monotonic -> realtime offset
- * @offs_boot:	pointer to storage for monotonic -> boottime offset
  * @offs_tai:	pointer to storage for monotonic -> clock tai offset
  *
  * Returns current monotonic time and updates the offsets if the
@@ -2246,7 +2204,7 @@ void do_timer(unsigned long ticks)
  * Called from hrtimer_interrupt() or retrigger_next_event()
  */
 ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
-				     ktime_t *offs_boot, ktime_t *offs_tai)
+				     ktime_t *offs_tai)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned int seq;
@@ -2263,7 +2221,6 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
 		if (*cwsseq != tk->clock_was_set_seq) {
 			*cwsseq = tk->clock_was_set_seq;
 			*offs_real = tk->offs_real;
-			*offs_boot = tk->offs_boot;
 			*offs_tai = tk->offs_tai;
 		}
 
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 7a9b4eb7a1d5..79b67f5e0343 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -6,7 +6,6 @@
  */
 extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
 					    ktime_t *offs_real,
-					    ktime_t *offs_boot,
 					    ktime_t *offs_tai);
 
 extern int timekeeping_valid_for_hres(void);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 20a2300ae4e8..300f4ea39646 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1164,7 +1164,7 @@ static struct {
 	{ trace_clock,			"perf",		1 },
 	{ ktime_get_mono_fast_ns,	"mono",		1 },
 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
-	{ ktime_get_boot_fast_ns,	"boot",		1 },
+	{ ktime_get_mono_fast_ns,	"boot",		1 },
 	ARCH_TRACE_CLOCKS
 };
 
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 9f79547d1b97..f0a2934605bf 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -71,6 +71,12 @@ static void __exit libcrc32c_mod_fini(void)
 	crypto_free_shash(tfm);
 }
 
+const char *crc32c_impl(void)
+{
+	return crypto_shash_driver_name(tfm);
+}
+EXPORT_SYMBOL(crc32c_impl);
+
 module_init(libcrc32c_mod_init);
 module_exit(libcrc32c_mod_fini);
 
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 65792650c630..5abfbf1b8fe2 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -221,6 +221,7 @@ static int symbol_valid(struct sym_entry *s)
 
 	static char *special_prefixes[] = {
 		"__crc_",		/* modversions */
+		"__efistub_",		/* arm64 EFI stub namespace */
 		NULL };
 
 	static char *special_suffixes[] = {
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index c74ecc6504e8..191eb949d52c 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -368,6 +368,11 @@ if ($arch eq "x86_64") {
 } elsif ($arch eq "microblaze") {
     # Microblaze calls '_mcount' instead of plain 'mcount'.
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+} elsif ($arch eq "riscv") {
+    $function_regex = "^([0-9a-fA-F]+)\\s+<([^.0-9][0-9a-zA-Z_\\.]+)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL\\s_mcount\$";
+    $type = ".quad";
+    $alignment = 2;
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }